In [1]:
import pandas as pd
import numpy as np
import requests
import json
from pandas import json_normalize

from geopy.geocoders import Nominatim

import matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Loading Municipal Data

In [2]:
munfile = r'municipal.csv'
mun_on = pd.read_csv(munfile)
mun_on.drop(['Unnamed: 0'],axis=1,inplace=True)

In [3]:
mun_on.head()

Unnamed: 0,Municipality,Population,Area,Density
0,Toronto,2731571,630.2,4334.5
1,Ottawa,934243,2790.3,334.8
2,Mississauga,721599,292.43,2467.6
3,Brampton,593638,266.36,2228.7
4,Hamilton,536917,1117.29,480.6


#### Loading City with Covid Data

In [4]:
covcityfile = r'cov_city.csv'
cov_city = pd.read_csv(covcityfile)
cov_city.drop(['Unnamed: 0'],axis=1,inplace=True)

In [5]:
cov_city_loc = [cov_city['Latitude'].mean(),cov_city['Longitude'].mean()]

In [6]:
map_on = folium.Map(location=[cov_city_loc[0],cov_city_loc[1]], zoom_start=6)

## taken from lab
for lat, lng, mun in zip(cov_city['Latitude'], cov_city['Longitude'], cov_city['Reporting PHU City']):
    label = '{}'.format(mun)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=6,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map_on) 

map_on

#### Let's look at the Full Covid-19 Positive Cases Data

In [7]:
cov_pos_file = r'conposcovidloc.csv'
cov_pos = pd.read_csv(cov_pos_file)
cov_pos.head()

Unnamed: 0,Row_ID,Accurate_Episode_Date,Age_Group,Client_Gender,Case_AcquisitionInfo,Outcome1,Reporting_PHU,Reporting_PHU_Address,Reporting_PHU_City,Reporting_PHU_Postal_Code,Reporting_PHU_Website,Reporting_PHU_Latitude,Reporting_PHU_Longitude
0,1,2020-01-22,50s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
1,2,2020-01-21,50s,MALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
2,3,2020-01-24,20s,FEMALE,Travel-Related,Resolved,Middlesex-London Health Unit,50 King Street,London,N6A 5L7,www.healthunit.com,42.981468,-81.254016
3,4,2020-02-05,20s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358
4,5,2020-02-16,60s,FEMALE,Travel-Related,Resolved,Toronto Public Health,"277 Victoria Street, 5th Floor",Toronto,M5B 1W2,www.toronto.ca/community-people/health-wellnes...,43.656591,-79.379358


In [8]:
cov_pos_fil = cov_pos[['Row_ID', \
                            'Accurate_Episode_Date', \
                            'Case_AcquisitionInfo' , \
                            'Outcome1', \
                            'Reporting_PHU_City']] \
                            .set_index(['Row_ID'])
cov_pos_fil.columns = ['Date','Infection Source','Outcome','City']

In [9]:
cov_pos_fil.head(10)

Unnamed: 0_level_0,Date,Infection Source,Outcome,City
Row_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2020-01-22,Travel-Related,Resolved,Toronto
2,2020-01-21,Travel-Related,Resolved,Toronto
3,2020-01-24,Travel-Related,Resolved,London
4,2020-02-05,Travel-Related,Resolved,Toronto
5,2020-02-16,Travel-Related,Resolved,Toronto
6,2020-02-20,Contact of a confirmed case,Resolved,Toronto
7,2020-02-24,Travel-Related,Resolved,Toronto
8,2020-02-25,Travel-Related,Resolved,Newmarket
9,2020-02-20,Travel-Related,Resolved,Toronto
10,2020-02-24,Travel-Related,Resolved,Whitby


#### Let's work on the Foursquare data

In [303]:
CLIENT_ID = 'yourcode' # your Foursquare ID
CLIENT_SECRET = 'yoursecret' # your Foursquare Secret
VERSION = 'yourversion' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AVKASV1WRO0ILDGAXCZ15WJUWC1BI2OW4OVIWCB1GFFMGHHL
CLIENT_SECRET:NFW422HI05C0QRJLJDGNS0UJ1I2VBH5BILDRXDIZ2B4DRFFU


In [None]:
CLIENT_ID = 'id' # your Foursquare ID
CLIENT_SECRET = 'secret' # your Foursquare Secret
VERSION = 'version' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET: ' + CLIENT_SECRET)

#### Add the Latitude and Longitude to the Municipality data

#### Same as the Lab

In [11]:
## same as the Lab
# function that extracts the category of the venue

LIMIT=50

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [12]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Municipality', 
                  'Municipality Latitude', 
                  'Municipality Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [13]:
cov_city.head(10)

Unnamed: 0,Reporting PHU City,Latitude,Longitude
0,Toronto,43.653482,-79.383935
1,London,42.983675,-81.249607
2,Newmarket,44.056258,-79.461708
3,Whitby,43.899871,-78.940201
4,Waterloo,43.466874,-80.524635
5,Mississauga,43.590338,-79.645729
6,Oakville,43.447436,-79.666672
7,Ottawa,45.421106,-75.690308
8,Sudbury,46.49272,-80.991211
9,Hamilton,43.25608,-79.872858


In [15]:
cov_city_venues = getNearbyVenues(names=cov_city['Reporting PHU City'],
                                   latitudes=cov_city['Latitude'],
                                   longitudes=cov_city['Longitude']
                                  )

Toronto
London
Newmarket
Whitby
Waterloo
Mississauga
Oakville
Ottawa
Sudbury
Hamilton
Cornwall
Barrie
Thorold
Port Hope
Stratford
Kenora
Peterborough
Owen Sound
Guelph
Brantford
Windsor
Kingston
Sault Ste. Marie
Timmins
Chatham
Belleville
St. Thomas
New Liskeard
North Bay
Point Edward
Thunder Bay
Brockville
Simcoe
Pembroke


In [17]:
print(cov_city_venues.shape)
cov_city_venues.head()

(1049, 7)


Unnamed: 0,Municipality,Municipality Latitude,Municipality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Toronto,43.653482,-79.383935,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,Toronto,43.653482,-79.383935,Nathan Phillips Square,43.65227,-79.383516,Plaza
2,Toronto,43.653482,-79.383935,Indigo,43.653515,-79.380696,Bookstore
3,Toronto,43.653482,-79.383935,Eggspectation Bell Trinity Square,43.653144,-79.38198,Breakfast Spot
4,Toronto,43.653482,-79.383935,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop


In [18]:
cov_city_venues.groupby('Municipality').count()

Unnamed: 0_level_0,Municipality Latitude,Municipality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barrie,46,46,46,46,46,46
Belleville,28,28,28,28,28,28
Brantford,35,35,35,35,35,35
Brockville,18,18,18,18,18,18
Chatham,21,21,21,21,21,21
Cornwall,21,21,21,21,21,21
Guelph,50,50,50,50,50,50
Hamilton,50,50,50,50,50,50
Kenora,12,12,12,12,12,12
Kingston,50,50,50,50,50,50


##### Correcting Lat and Long for Pembroke and Simcoe because the resulting venues are too low

In [19]:
# lat and long by googling manually
pembroke_loc = [45.8267,-77.1109]
simcoe_loc = [42.8373,-80.3040]
#newlisk_loc = [47.5037,-79.6979]

In [20]:
temp_cov = cov_city.set_index(['Reporting PHU City'])
temp_cov.head()
temp_cov.loc[['Pembroke','Simcoe']]
#temp_cov.loc[['Pembroke','Simcoe','New Liskeard']]

Unnamed: 0_level_0,Latitude,Longitude
Reporting PHU City,Unnamed: 1_level_1,Unnamed: 2_level_1
Pembroke,45.82554,-77.115065
Simcoe,44.501401,-79.703871


In [21]:
temp_cov.loc[["Pembroke"],["Latitude"]] = pembroke_loc[0]
temp_cov.loc[["Pembroke"],["Longitude"]] = pembroke_loc[1]
#temp_cov.loc[["New Liskeard"],["Latitude"]] = newlisk_loc[0]
#temp_cov.loc[["New Liskeard"],["Longitude"]] = newlisk_loc[1]
temp_cov.loc[["Simcoe"],["Latitude"]] = simcoe_loc[0]
temp_cov.loc[["Simcoe"],["Longitude"]] = simcoe_loc[1]

In [22]:
#temp_cov.loc[['Pembroke','Simcoe','New Liskeard']]
temp_cov.loc[['Pembroke','Simcoe']]

Unnamed: 0_level_0,Latitude,Longitude
Reporting PHU City,Unnamed: 1_level_1,Unnamed: 2_level_1
Pembroke,45.8267,-77.1109
Simcoe,42.8373,-80.304


In [23]:
cov_city = temp_cov.reset_index()
cov_city.tail()

Unnamed: 0,Reporting PHU City,Latitude,Longitude
29,Point Edward,42.997903,-82.413294
30,Thunder Bay,48.406414,-89.259796
31,Brockville,44.589593,-75.684333
32,Simcoe,42.8373,-80.304
33,Pembroke,45.8267,-77.1109


In [25]:
cov_city_venues = getNearbyVenues(names=cov_city['Reporting PHU City'],
                                   latitudes=cov_city['Latitude'],
                                   longitudes=cov_city['Longitude']
                                  )
print('finished')

Toronto
London
Newmarket
Whitby
Waterloo
Mississauga
Oakville
Ottawa
Sudbury
Hamilton
Cornwall
Barrie
Thorold
Port Hope
Stratford
Kenora
Peterborough
Owen Sound
Guelph
Brantford
Windsor
Kingston
Sault Ste. Marie
Timmins
Chatham
Belleville
St. Thomas
New Liskeard
North Bay
Point Edward
Thunder Bay
Brockville
Simcoe
Pembroke
finished


In [26]:
cov_city_venues.to_csv('cov_city_venues.csv')

In [27]:
print(cov_city_venues.shape)
cov_city_venues.head()

(1060, 7)


Unnamed: 0,Municipality,Municipality Latitude,Municipality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Toronto,43.653482,-79.383935,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,Toronto,43.653482,-79.383935,Nathan Phillips Square,43.65227,-79.383516,Plaza
2,Toronto,43.653482,-79.383935,Indigo,43.653515,-79.380696,Bookstore
3,Toronto,43.653482,-79.383935,Eggspectation Bell Trinity Square,43.653144,-79.38198,Breakfast Spot
4,Toronto,43.653482,-79.383935,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop


In [28]:
cov_city_venues.groupby('Municipality').count()

Unnamed: 0_level_0,Municipality Latitude,Municipality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Municipality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Barrie,46,46,46,46,46,46
Belleville,28,28,28,28,28,28
Brantford,35,35,35,35,35,35
Brockville,18,18,18,18,18,18
Chatham,21,21,21,21,21,21
Cornwall,21,21,21,21,21,21
Guelph,50,50,50,50,50,50
Hamilton,50,50,50,50,50,50
Kenora,12,12,12,12,12,12
Kingston,50,50,50,50,50,50


In [29]:
print('There are {} uniques categories.'.format(len(cov_city_venues['Venue Category'].unique())))

There are 167 uniques categories.


In [30]:
# one hot encoding
cov_city_onehot = pd.get_dummies(cov_city_venues[['Venue Category']], prefix="", prefix_sep="") 
cov_city_onehot.insert(0,'Area',cov_city_venues['Municipality'])
cov_city_onehot.head()

Unnamed: 0,Area,ATM,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Garage,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Beer Garden,Beer Store,Bistro,Bookstore,Border Crossing,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Café,Cajun / Creole Restaurant,Canal Lock,Casino,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Duty-free Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden Center,Gas Station,Gastropub,General Entertainment,General Travel,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Hardware Store,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Korean Restaurant,Lake,Laser Tag,Latin American Restaurant,Library,Liquor Store,Lounge,Market,Mediterranean Restaurant,Memorial Site,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Newsstand,Nightclub,Noodle House,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plaza,Portuguese Restaurant,Poutine Place,Print Shop,Pub,Ramen Restaurant,Record Shop,Restaurant,Rock Club,Salad Place,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Skating Rink,Smoke Shop,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Toronto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [31]:
cov_city_onehot.shape

(1060, 168)

In [32]:
cov_city_grouped = cov_city_onehot.groupby('Area').mean().reset_index()
cov_city_grouped

Unnamed: 0,Area,ATM,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Auto Garage,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Beer Garden,Beer Store,Bistro,Bookstore,Border Crossing,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Business Service,Café,Cajun / Creole Restaurant,Canal Lock,Casino,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,Comic Shop,Concert Hall,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Dog Run,Duty-free Shop,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Garden Center,Gas Station,Gastropub,General Entertainment,General Travel,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Hardware Store,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Korean Restaurant,Lake,Laser Tag,Latin American Restaurant,Library,Liquor Store,Lounge,Market,Mediterranean Restaurant,Memorial Site,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Newsstand,Nightclub,Noodle House,Other Great Outdoors,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plaza,Portuguese Restaurant,Poutine Place,Print Shop,Pub,Ramen Restaurant,Record Shop,Restaurant,Rock Club,Salad Place,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Skating Rink,Smoke Shop,Soup Place,Southern / Soul Food Restaurant,Spa,Speakeasy,Sports Bar,Steakhouse,Supermarket,Sushi Restaurant,Taco Place,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Train Station,Tunnel,Vegetarian / Vegan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Barrie,0.021739,0.021739,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.043478,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.043478,0.0,0.0,0.021739,0.0,0.065217,0.0,0.0,0.021739,0.0,0.0,0.043478,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.043478,0.021739,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Belleville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.035714,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Brantford,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.085714,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0
3,Brockville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Chatham,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.047619,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.095238,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0
5,Cornwall,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.095238,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Guelph,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.02,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.1,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.02,0.02,0.0,0.0,0.0,0.0,0.0
7,Hamilton,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.08,0.0,0.02,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0
8,Kenora,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Kingston,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.04,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.1,0.0,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
num_top_venues = 5

for hood in cov_city_grouped['Area']:
    print("----"+hood+"----")
    #temp = down_grouped[down_grouped['Area'] == hood].T.reset_index()
    temp = cov_city_grouped[cov_city_grouped['Area'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Barrie----
                           venue  freq
0                    Coffee Shop  0.09
1                            Pub  0.07
2                           Café  0.04
3                 Sandwich Place  0.04
4  Vegetarian / Vegan Restaurant  0.04


----Belleville----
                  venue  freq
0       Harbor / Marina  0.07
1  Gym / Fitness Center  0.04
2     Korean Restaurant  0.04
3           Coffee Shop  0.04
4           Music Store  0.04


----Brantford----
         venue  freq
0  Coffee Shop  0.09
1         Café  0.09
2   Restaurant  0.06
3         Bank  0.06
4  Pizza Place  0.06


----Brockville----
           venue  freq
0       Pharmacy  0.11
1            Pub  0.11
2           Park  0.06
3  Train Station  0.06
4    Coffee Shop  0.06


----Chatham----
               venue  freq
0        Coffee Shop  0.14
1  Convenience Store  0.10
2          Bookstore  0.05
3         Beer Store  0.05
4   Sushi Restaurant  0.05


----Cornwall----
            venue  freq
0   Shopping Mall  0.1

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [35]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Area']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
cov_city_venues_sorted = pd.DataFrame(columns=columns)
cov_city_venues_sorted['Area'] = cov_city_grouped['Area']

for ind in np.arange(cov_city_grouped.shape[0]):
    cov_city_venues_sorted.iloc[ind, 1:] = return_most_common_venues(cov_city_grouped.iloc[ind, :], num_top_venues)

cov_city_venues_sorted.head()

Unnamed: 0,Area,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Barrie,Coffee Shop,Pub,Sandwich Place,Harbor / Marina,Café
1,Belleville,Harbor / Marina,Gas Station,French Restaurant,Burger Joint,Library
2,Brantford,Café,Coffee Shop,Restaurant,Diner,Fast Food Restaurant
3,Brockville,Pharmacy,Pub,Coffee Shop,Sandwich Place,Fish & Chips Shop
4,Chatham,Coffee Shop,Convenience Store,Beer Store,Theater,Restaurant


In [36]:
# set number of clusters
kclusters = 4

cov_city_grouped_clustering = cov_city_grouped.drop('Area', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(cov_city_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 1, 1, 2, 2, 0, 2])

In [37]:
# add clustering labels
cov_city_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

cov_city_merged = cov_city

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
cov_city_merged = cov_city_merged.join(cov_city_venues_sorted.set_index('Area'), on='Reporting PHU City')

cov_city_merged.head() # check the last columns!

Unnamed: 0,Reporting PHU City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place


In [38]:
cov_city_merged.tail()

Unnamed: 0,Reporting PHU City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
29,Point Edward,42.997903,-82.413294,2,Food Truck,Ice Cream Shop,Restaurant,Hotel,Border Crossing
30,Thunder Bay,48.406414,-89.259796,1,Restaurant,Coffee Shop,American Restaurant,Hotel,Grocery Store
31,Brockville,44.589593,-75.684333,2,Pharmacy,Pub,Coffee Shop,Sandwich Place,Fish & Chips Shop
32,Simcoe,42.8373,-80.304,0,Fast Food Restaurant,Golf Course,Sandwich Place,Portuguese Restaurant,Supermarket
33,Pembroke,45.8267,-77.1109,3,Ice Cream Shop,Supermarket,Electronics Store,Coffee Shop,Café


In [39]:
# create map
map_on_clusters = folium.Map(location=[cov_city_merged['Latitude'].mean(),cov_city_merged['Longitude'].mean()], zoom_start=6)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cov_city_merged['Latitude'], cov_city_merged['Longitude'], cov_city_merged['Reporting PHU City'], cov_city_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_on_clusters)
       
map_on_clusters

In [40]:
cov_city_merged.loc[cov_city_merged['Cluster Labels'] == 0, cov_city_merged.columns[[0] + list(range(3, cov_city_merged.shape[1]))]]

Unnamed: 0,Reporting PHU City,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
15,Kenora,0,Fast Food Restaurant,Pharmacy,Supermarket,Harbor / Marina,Grocery Store
23,Timmins,0,Fast Food Restaurant,Coffee Shop,Supermarket,Gas Station,Pharmacy
32,Simcoe,0,Fast Food Restaurant,Golf Course,Sandwich Place,Portuguese Restaurant,Supermarket


In [41]:
cov_city_merged.loc[cov_city_merged['Cluster Labels'] == 1, cov_city_merged.columns[[0] + list(range(3, cov_city_merged.shape[1]))]]

Unnamed: 0,Reporting PHU City,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
3,Whitby,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy
10,Cornwall,1,Shopping Mall,Burger Joint,Coffee Shop,Pharmacy,Bakery
13,Port Hope,1,Coffee Shop,Italian Restaurant,Pet Store,Bank,Park
24,Chatham,1,Coffee Shop,Convenience Store,Beer Store,Theater,Restaurant
26,St. Thomas,1,Coffee Shop,Convenience Store,Pizza Place,Museum,Pharmacy
27,New Liskeard,1,Grocery Store,Beer Store,American Restaurant,Hotel,Hardware Store
28,North Bay,1,Restaurant,Gym,Coffee Shop,Sandwich Place,Pizza Place
30,Thunder Bay,1,Restaurant,Coffee Shop,American Restaurant,Hotel,Grocery Store


In [42]:
cov_city_merged.loc[cov_city_merged['Cluster Labels'] == 2, cov_city_merged.columns[[0] + list(range(3, cov_city_merged.shape[1]))]]

Unnamed: 0,Reporting PHU City,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Toronto,2,Coffee Shop,Café,Theater,Restaurant,Plaza
1,London,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner
2,Newmarket,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub
4,Waterloo,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place
5,Mississauga,2,Coffee Shop,Clothing Store,Bookstore,Cosmetics Shop,Italian Restaurant
6,Oakville,2,Coffee Shop,Restaurant,Pub,Italian Restaurant,Bakery
7,Ottawa,2,Coffee Shop,Concert Hall,Hotel,Grocery Store,Art Gallery
8,Sudbury,2,Hotel,Café,Bank,Coffee Shop,Sandwich Place
9,Hamilton,2,Café,Pub,Restaurant,Middle Eastern Restaurant,Vietnamese Restaurant
11,Barrie,2,Coffee Shop,Pub,Sandwich Place,Harbor / Marina,Café


In [43]:
cov_city_merged.loc[cov_city_merged['Cluster Labels'] == 3, cov_city_merged.columns[[0] + list(range(3, cov_city_merged.shape[1]))]]

Unnamed: 0,Reporting PHU City,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
33,Pembroke,3,Ice Cream Shop,Supermarket,Electronics Store,Coffee Shop,Café


In [44]:
#cov_city_merged.loc[cov_city_merged['Cluster Labels'] == 4, cov_city_merged.columns[[0] + list(range(3, cov_city_merged.shape[1]))]]

#### Checking the positive cases data

In [45]:
cov_pos_group = cov_pos_fil.groupby(['Infection Source']).count()
cov_pos_group.head()

Unnamed: 0_level_0,Date,Outcome,City
Infection Source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Contact of a confirmed case,2205,2205,2205
Information pending,4929,4929,4929
Neither,3836,3836,3836
Travel-Related,1275,1275,1275


In [46]:
cov_city_merged.head()

Unnamed: 0,Reporting PHU City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place


In [47]:
cov_city_merged.rename(columns={'Reporting PHU City' : 'City'}).head()

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place


In [48]:
df_cov = cov_city_merged.rename(columns={'Reporting PHU City' : 'City'})

In [49]:
#df_cov = df_cov[['City','Cluster Labels','Latitude','Longitude']]
df_cov.head()

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place


In [50]:
cov_pos_fil.head()

Unnamed: 0_level_0,Date,Infection Source,Outcome,City
Row_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2020-01-22,Travel-Related,Resolved,Toronto
2,2020-01-21,Travel-Related,Resolved,Toronto
3,2020-01-24,Travel-Related,Resolved,London
4,2020-02-05,Travel-Related,Resolved,Toronto
5,2020-02-16,Travel-Related,Resolved,Toronto


In [51]:
cov_pos_fil.groupby(['City']).count()

Unnamed: 0_level_0,Date,Infection Source,Outcome
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Barrie,209,209,209
Belleville,35,35,35
Brantford,82,82,82
Brockville,279,279,279
Chatham,31,31,31
Cornwall,70,70,70
Guelph,199,199,199
Hamilton,342,342,342
Kenora,13,13,13
Kingston,58,58,58


In [52]:
df_cov_merge = df_cov

In [53]:
df_cov_merge

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place
5,Mississauga,43.590338,-79.645729,2,Coffee Shop,Clothing Store,Bookstore,Cosmetics Shop,Italian Restaurant
6,Oakville,43.447436,-79.666672,2,Coffee Shop,Restaurant,Pub,Italian Restaurant,Bakery
7,Ottawa,45.421106,-75.690308,2,Coffee Shop,Concert Hall,Hotel,Grocery Store,Art Gallery
8,Sudbury,46.49272,-80.991211,2,Hotel,Café,Bank,Coffee Shop,Sandwich Place
9,Hamilton,43.25608,-79.872858,2,Café,Pub,Restaurant,Middle Eastern Restaurant,Vietnamese Restaurant


In [54]:
sContact = []
sTravel = []
sPending = []
sNeither = []

def loadCaseTable(city):
    temp = cov_pos_fil[cov_pos_fil['City']==city]
    temp2 = temp.groupby(['Infection Source']).count().T.iloc[[0]]
    temp2.columns.name = ''
    temp2.reset_index(drop=True)
    if(len(temp2.iloc[0]) == 3):
        sContact.append(temp2.iloc[0][0])
        sTravel.append(temp2.iloc[0][2])
        sPending.append(0)
        sNeither.append(temp2.iloc[0][1])
        #print(temp2.columns)
    else:
        sContact.append(temp2.iloc[0][0])
        sTravel.append(temp2.iloc[0][3])
        sPending.append(temp2.iloc[0][1])
        sNeither.append(temp2.iloc[0][2])
        #print('none')

for city in df_cov_merge['City']:
    loadCaseTable(city)
    #print(city)

In [55]:
df_cov_merge['Contact']=sContact
df_cov_merge['Travel']=sTravel
df_cov_merge['Pending']=sPending
df_cov_merge['Neither']=sNeither
df_cov_merge

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Contact,Travel,Pending,Neither
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza,424,186,2032,942
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner,9,22,312,6
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub,221,135,360,441
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy,152,42,37,482
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place,75,39,78,91
5,Mississauga,43.590338,-79.645729,2,Coffee Shop,Clothing Store,Bookstore,Cosmetics Shop,Italian Restaurant,71,65,1339,330
6,Oakville,43.447436,-79.666672,2,Coffee Shop,Restaurant,Pub,Italian Restaurant,Bakery,102,76,38,177
7,Ottawa,45.421106,-75.690308,2,Coffee Shop,Concert Hall,Hotel,Grocery Store,Art Gallery,215,161,143,394
8,Sudbury,46.49272,-80.991211,2,Hotel,Café,Bank,Coffee Shop,Sandwich Place,12,20,5,9
9,Hamilton,43.25608,-79.872858,2,Café,Pub,Restaurant,Middle Eastern Restaurant,Vietnamese Restaurant,158,58,22,104


In [56]:
df_cov_merge['Total'] = df_cov_merge['Contact'] + df_cov_merge['Travel'] + df_cov_merge['Pending'] + df_cov_merge['Neither']
df_cov_merge.head()

Unnamed: 0,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Contact,Travel,Pending,Neither,Total
0,Toronto,43.653482,-79.383935,2,Coffee Shop,Café,Theater,Restaurant,Plaza,424,186,2032,942,3584
1,London,42.983675,-81.249607,2,Indian Restaurant,Bookstore,Thai Restaurant,Italian Restaurant,Diner,9,22,312,6,349
2,Newmarket,44.056258,-79.461708,2,Electronics Store,Café,Burger Joint,Bakery,Gastropub,221,135,360,441,1157
3,Whitby,43.899871,-78.940201,1,Coffee Shop,Bank,Sandwich Place,Pizza Place,Pharmacy,152,42,37,482,713
4,Waterloo,43.466874,-80.524635,2,Restaurant,Café,Bar,Coffee Shop,Pizza Place,75,39,78,91,283


In [57]:
df_cov_merge.groupby(['Cluster Labels']).sum()

Unnamed: 0_level_0,Latitude,Longitude,Contact,Travel,Pending,Neither,Total
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,141.080988,-256.123767,60,21,33,115,229
1,360.278989,-643.740707,261,131,56,628,1076
2,976.448999,-1676.181632,1882,1119,4834,3091,10926
3,45.8267,-77.1109,2,4,6,2,14


In [58]:
cluster_0 = df_cov_merge.loc[df_cov_merge['Cluster Labels'] == 0, df_cov_merge.columns[[0] + [4] + list(range(9, df_cov_merge.shape[1]))]]
cluster_0.reset_index(drop=True)

Unnamed: 0,City,1st Most Common Venue,Contact,Travel,Pending,Neither,Total
0,Kenora,Fast Food Restaurant,1,9,0,3,13
1,Timmins,Fast Food Restaurant,25,6,4,17,52
2,Simcoe,Fast Food Restaurant,34,6,29,95,164


In [59]:
print('Average of total case per City in Cluster 0: %.2f' % cluster_0['Total'].mean())
print('Average of case per City in Cluster 0 caused by contact with confirmed case or unknown cases: %.2f' % (cluster_0['Contact']+cluster_0['Neither']).mean())

Average of total case per City in Cluster 0: 76.33
Average of case per City in Cluster 0 caused by contact with confirmed case or unknown cases: 58.33


In [60]:
cluster_1 = df_cov_merge.loc[df_cov_merge['Cluster Labels'] == 1, df_cov_merge.columns[[0] + [4] + list(range(9, df_cov_merge.shape[1]))]]
cluster_1.reset_index(drop=True)

Unnamed: 0,City,1st Most Common Venue,Contact,Travel,Pending,Neither,Total
0,Whitby,Coffee Shop,152,42,37,482,713
1,Cornwall,Shopping Mall,15,30,1,24,70
2,Port Hope,Coffee Shop,27,25,7,78,137
3,Chatham,Coffee Shop,13,5,1,12,31
4,St. Thomas,Coffee Shop,26,11,3,8,48
5,New Liskeard,Grocery Store,6,2,0,4,12
6,North Bay,Restaurant,4,7,0,3,14
7,Thunder Bay,Restaurant,18,9,7,17,51


In [61]:
print('Average of total case per City in Cluster 1: %.2f' % cluster_1['Total'].mean())
print('Average of case per City in Cluster 1 caused by contact with confirmed case or unknown cases: %.2f' % (cluster_1['Contact']+cluster_1['Neither']).mean())

Average of total case per City in Cluster 1: 134.50
Average of case per City in Cluster 1 caused by contact with confirmed case or unknown cases: 111.12


In [62]:
cluster_2 = df_cov_merge.loc[df_cov_merge['Cluster Labels'] == 2, df_cov_merge.columns[[0] + [4] + list(range(9, df_cov_merge.shape[1]))]]
cluster_2.reset_index(drop=True)

Unnamed: 0,City,1st Most Common Venue,Contact,Travel,Pending,Neither,Total
0,Toronto,Coffee Shop,424,186,2032,942,3584
1,London,Indian Restaurant,9,22,312,6,349
2,Newmarket,Electronics Store,221,135,360,441,1157
3,Waterloo,Restaurant,75,39,78,91,283
4,Mississauga,Coffee Shop,71,65,1339,330,1805
5,Oakville,Coffee Shop,102,76,38,177,393
6,Ottawa,Coffee Shop,215,161,143,394,913
7,Sudbury,Hotel,12,20,5,9,46
8,Hamilton,Café,158,58,22,104,342
9,Barrie,Coffee Shop,59,46,6,98,209


In [63]:
print('Average of total case per City in Cluster 0: %.2f' % cluster_2['Total'].mean())
print('Average of case per City in Cluster 2 caused by contact with confirmed case or unknown cases: %.2f' % (cluster_2['Contact']+cluster_2['Neither']).mean())

Average of total case per City in Cluster 0: 496.64
Average of case per City in Cluster 2 caused by contact with confirmed case or unknown cases: 226.05


In [64]:
cluster_3 = df_cov_merge.loc[df_cov_merge['Cluster Labels'] == 3, df_cov_merge.columns[[0] + [4] + list(range(9, df_cov_merge.shape[1]))]]
cluster_3.reset_index(drop=True)

Unnamed: 0,City,1st Most Common Venue,Contact,Travel,Pending,Neither,Total
0,Pembroke,Ice Cream Shop,2,4,6,2,14


In [65]:
print('Average of total case per City in Cluster 0: %.2f' % cluster_3['Total'].mean())
print('Average of case per City in Cluster 3 caused by contact with confirmed case or unknown cases: %.2f' % (cluster_3['Contact']+cluster_3['Neither']).mean())

Average of total case per City in Cluster 0: 14.00
Average of case per City in Cluster 3 caused by contact with confirmed case or unknown cases: 4.00


##### Resulting Table

| Cluster | Num of City/Municipal | Average Total | Average Contact or Neither  |
|:-------:|:---------------------:|:-------------:|:----------------------------|
| 0       | 3   | 76.33   | 58.33  |
| 1       | 8  | 134.50  | 111.12 |
| 2       | 22  | 496.64  | 226.05 |
| 3       | 1   | 14.00   | 4.00   |
