# Determining best regions in Melbourne for a specific food category based on food category, ratings, review counts using K-Means clustering

Importing Required Libraries

In [280]:
import pandas as pd 
pd.set_option('display.max_rows', 1000)
import numpy as np 

from geopy import Nominatim

import folium 


print('Libraries import done!')

Libraries import done!


## 1. Data Preparation 

### 1.1 Importing Australian postal code dataset

In [258]:
postal_code_aus_df = pd.read_csv('/Users/chaarvi/Desktop/DS-Capstone Project/Final Project/Dataset/australian_postcodes.csv')
postal_code_aus_df.shape

(18275, 14)

In [259]:
# Filtering the dataset to get postal codes of suburbs only in Melbourne, the state code for Victoria is Vic and region code for Melbourne is R1 
postal_code_Melb_df = postal_code_aus_df[(postal_code_aus_df['state']=='VIC') & (postal_code_aus_df['region']=='R1')]
postal_code_Melb_df.shape

(556, 14)

In [266]:
postal_code_Melb_df.reset_index(inplace=True)

In [267]:
postal_code_Melb_df.head()

Unnamed: 0,level_0,index,id,postcode,locality,state,long,lat,dc,type,status,sa3,sa3name,sa4,sa4name,region
0,0,6103,4746,3000,MELBOURNE,VIC,144.956776,-37.817403,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner,R1
1,1,6104,4747,3001,MELBOURNE,VIC,144.956776,-37.817403,CITY MAIL PROCESSING CENTRE,Post Office Boxes,Updated 25-Mar-2020 SA3,20605.0,Port Phillip,206.0,Melbourne - Inner,R1
2,2,6105,4748,3002,EAST MELBOURNE,VIC,144.982207,-37.818517,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner,R1
3,3,6106,4749,3003,WEST MELBOURNE,VIC,144.949592,-37.810871,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20604.0,Melbourne City,206.0,Melbourne - Inner,R1
4,4,6107,4750,3004,MELBOURNE,VIC,144.970161,-37.844246,CITY DELIVERY CENTRE,Delivery Area,Updated 6-Feb-2020,20605.0,Port Phillip,206.0,Melbourne - Inner,R1


In [268]:
# getting the required columns from the dataset 
required_col = ['postcode', 'locality', 'long','lat']

In [269]:
postal_code_Melb_df = pd.DataFrame(postal_code_Melb_df[required_col])

### 1.2 Data Wrangling:  Melbourne Post Code Data

In [270]:
postal_code_Melb_df.shape

(556, 4)

In [271]:
postal_code_Melb_df

Unnamed: 0,postcode,locality,long,lat
0,3000,MELBOURNE,144.956776,-37.817403
1,3001,MELBOURNE,144.956776,-37.817403
2,3002,EAST MELBOURNE,144.982207,-37.818517
3,3003,WEST MELBOURNE,144.949592,-37.810871
4,3004,MELBOURNE,144.970161,-37.844246
5,3004,ST KILDA ROAD CENTRAL,144.970161,-37.844246
6,3004,ST KILDA ROAD MELBOURNE,0.0,0.0
7,3005,WORLD TRADE CENTRE,144.950858,-37.824608
8,3006,SOUTH WHARF,144.952074,-37.825287
9,3006,SOUTHBANK,144.965926,-37.823258


In [272]:
# Rows with missing coordinates
long_lat_0 = postal_code_Melb_df[postal_code_Melb_df['long']==0]
long_lat_0

Unnamed: 0,postcode,locality,long,lat
6,3004,ST KILDA ROAD MELBOURNE,0.0,0.0
107,3042,NIDDRIE NORTH,0.0,0.0
232,3103,STRADBROKE PARK,0.0,0.0
403,3176,SCORESBY BC,0.0,0.0


In [248]:
# defining functions to get geo-coordinates for missing coordinates, we would use geopy to get the missing coordinates
def get_lat(address):
    locator = Nominatim(user_agent='myGeocoder')
    location = locator.geocode(address)
    print(address)
    print(location.latitude)
    
    return location.latitude

def get_long(address):
    locator = Nominatim(user_agent='myGeocoder')
    location = locator.geocode(address)
    print(location.longitude)
    return location.longitude
    

In [273]:
postal_code_Melb_df['lat'] = postal_code_Melb_df.apply(lambda x:get_lat(x.locality + " Victoria Australia ") if x.lat ==0 else x.lat,axis=1)
postal_code_Melb_df['long'] = postal_code_Melb_df.apply(lambda x:get_long(x.locality +  " Victoria Australia") if x.long ==0 else x.long,axis=1)
postal_code_Melb_df

ST KILDA ROAD MELBOURNE Victoria Australia 
-37.8358215
NIDDRIE NORTH Victoria Australia 
-37.73718965
STRADBROKE PARK Victoria Australia 
-37.797699300000005
SCORESBY BC Victoria Australia 
-37.9025786
144.9763639
144.86462905846437
145.0612343522372
145.2183066


Unnamed: 0,postcode,locality,long,lat
0,3000,MELBOURNE,144.956776,-37.817403
1,3001,MELBOURNE,144.956776,-37.817403
2,3002,EAST MELBOURNE,144.982207,-37.818517
3,3003,WEST MELBOURNE,144.949592,-37.810871
4,3004,MELBOURNE,144.970161,-37.844246
5,3004,ST KILDA ROAD CENTRAL,144.970161,-37.844246
6,3004,ST KILDA ROAD MELBOURNE,144.976364,-37.835822
7,3005,WORLD TRADE CENTRE,144.950858,-37.824608
8,3006,SOUTH WHARF,144.952074,-37.825287
9,3006,SOUTHBANK,144.965926,-37.823258


In [274]:
# checking the number of unique postal codes, longitudes, latitudes 
print(f"There are {len(postal_code_Melb_df['locality'].unique())} localities value in Melbourne")
print(f"There are {len(postal_code_Melb_df['postcode'].unique())} postcodes in Melbourne")
print(f"There are {len(postal_code_Melb_df['long'].unique())} longitudes value in Melbourne")
print(f"There are {len(postal_code_Melb_df['lat'].unique())} latitudes value in Melbourne")


There are 533 localities value in Melbourne
There are 223 postcodes in Melbourne
There are 233 longitudes value in Melbourne
There are 233 latitudes value in Melbourne


In [275]:
locality_count= postal_code_Melb_df['locality'].value_counts()
locality_count

MELBOURNE                   15
PLUMPTON                     3
DOCKLANDS                    2
LA TROBE UNIVERSITY          2
WORLD TRADE CENTRE           2
ST KILDA ROAD CENTRAL        2
EAST MELBOURNE               2
DANDENONG SOUTH              2
DANDENONG                    2
SAINT HELENA                 1
KEILOR EAST                  1
MERLYNSTON                   1
ST KILDA SOUTH               1
OAKLANDS JUNCTION            1
KINGSVILLE WEST              1
MAIDSTONE                    1
MITCHAM                      1
SURREY HILLS SOUTH           1
SELBY                        1
ROCKBANK                     1
SPOTSWOOD                    1
UPPER FERNTREE GULLY         1
OAKLEIGH                     1
KINGSBURY                    1
VERMONT SOUTH                1
MURRUMBEENA                  1
CARLTON NORTH                1
WILLIAMSTOWN NORTH           1
CHADSTONE CENTRE             1
FOUNTAIN GATE                1
COCOROC                      1
BALWYN EAST                  1
DOVETON 

The following can be observed from the following observation 

1. There are multiple localities within a particular postcode, which is a likely scenario. <br>
<br>
2. Since there are multiple localities within a postcode, it is likely that a post code will correspond to more than one geo-coordinate



In [277]:
# grouping localities based on longitudes and latitudes, to get single row for each pair of coordinates
melb_post_code_grp= postal_code_Melb_df.groupby(['long', 'lat'])['locality'].apply(list).reset_index()
melb_post_code_grp

Unnamed: 0,long,lat,locality
0,144.546242,-37.65724,[HARKNESS]
1,144.556552,-37.899873,"[MAMBOURIN, MOUNT COTTRELL, WYNDHAM VALE]"
2,144.568337,-37.728705,[WEIR VIEWS]
3,144.571033,-37.705529,"[BROOKFIELD, EXFORD, EYNESBURY, MELTON SOUTH]"
4,144.571458,-37.656057,"[KURUNJANG, MELTON, MELTON WEST, TOOLERN VALE]"
5,144.581232,-37.866942,[MANOR LAKES]
6,144.596909,-37.73399,[STRATHTULLOH]
7,144.600855,-37.70732,[COBBLEBANK]
8,144.624896,-37.730292,[THORNHILL PARK]
9,144.63933,-37.7069,[GRANGEFIELDS]


### 1.3 Getting Nearby Venues for each pair of coordinates using the Fourquare API

In [281]:
address = 'Melbourne, AU'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Melbourne are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Melbourne are -37.8142176, 144.9631608.


In [285]:
# create map of Toronto using latitude and longitude values
map_Melbourne = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(melb_post_code_grp['lat'], melb_post_code_grp['long'], melb_post_code_grp['locality']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Melbourne) 

map_Melbourne