# Downloading the dependencies

In [5]:
# for web scraping
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import platform

try:
    import scrapy
except:
    !conda install -c conda-forge scrapy --yes
    import scrapy
    
from scrapy.crawler import CrawlerProcess
import scrapy.crawler as crawler
from multiprocessing import Process, Queue
from twisted.internet import reactor

# for downloading file to notebook
import os

# library to handle data in a vectorized manner
import numpy as np

# library for data analsysis
import pandas as pd

# for working on json
import json

# convert an address into latitude and longitude values
try:
    from geopy.geocoders import Nominatim
except:
    !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
    from geopy.geocoders import Nominatim

# library to handle requests
import requests

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# map rendering library
!conda install -c conda-forge folium=0.5.0 --yes
import folium

#importing sys
import sys

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - scrapy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    automat-20.2.0             |             py_0          30 KB  conda-forge
    bcrypt-3.1.7               |   py36h8c4c3a4_1          43 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    constantly-15.1.0          |             py_0           9 KB  conda-forge
    cryptography-2.9.2         |   py36h45558ae_0         613 KB  conda-forge
    cssselect-1.1.0            |             py_0          18 KB  conda-forge
    hyperlink-19.0.0           |     pyh9f0ad1d_0          35 KB  conda-forge
    idna-2.10              

# Download and Explore Dataset

There are around 400+ suburbs around Melbourne. There is no easy source available online, so I am "Web Scraping" the list of suburbs from wikipedia. The list is spread in following two pages which can be obtained by page scraping -  
Source – 
a) https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pageuntil=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages

b) https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pagefrom=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages

This information has to be refined to be used later on.


## Web Scraping using Scrapy

Scrapy is a fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing.

You can get more information from - https://docs.scrapy.org/en/latest/

This class creates a simple pipeline that writes all found items to a JSON file, where each line contains one JSON element.

In [6]:
class JsonWriterPipeline(object):

    def open_spider(self, spider):
        self.file = open('Mel_Subs.jl', 'w')

    def close_spider(self, spider):
        self.file.close()

    def process_item(self, item, spider):
        line = json.dumps(dict(item)) + "\n"
        self.file.write(line)
        return item

## Define the spider
The QuotesSpider class defines from which URLs to start crawling and which values to retrieve. I set the logging level of the crawler to warning, otherwise the notebook is overloaded with DEBUG messages about the retrieved data.

In [7]:
import logging

class QuotesSpider(scrapy.Spider):
    name = "quotes"
    start_urls = [
        'https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pageuntil=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages',
        'https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pagefrom=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages',
    ]
    
    # Delete the JSON / JL file if exists - useful when run multiple times
    filePath='Mel_Subs.json'
    if os.path.exists(filePath):
        os.remove(filePath)
        print('Deleted "Mel_Subs.json"')
    else:
        print("Can not delete the file as it doesn't exists")
        
    filePath='Mel_Subs.jl'
    if os.path.exists(filePath):
        os.remove(filePath)
        print('Deleted "Mel_Subs.jl"')
    else:
        print("Can not delete the file as it doesn't exists")
    
    custom_settings = {
        'LOG_LEVEL': logging.WARNING,
        'ITEM_PIPELINES': {'__main__.JsonWriterPipeline': 1}, # Used for pipeline 1
        'FEED_FORMAT':'json',                                 # Used for pipeline 2
        'FEED_URI': 'Mel_Subs.json'                        # Used for pipeline 2
    }
    
    def parse(self, response):
        #for quote in response.css('div.quote'):
        for subs in response.xpath("//div[@id='mw-pages']//div//ul[1]//li"):
            yield {
                #'text': quote.css('span.text::text').extract_first(),
                'name': subs.css('li a::text').extract_first(),
            }

Deleted "Mel_Subs.json"
Deleted "Mel_Subs.jl"


In [8]:
# the wrapper to make it run more times
def run_spider(spider):
    def f(q):
        try:
            runner = crawler.CrawlerRunner()
            #process = CrawlerProcess({
            #    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
            #})
            deferred = runner.crawl(spider)
            #deferred = process.crawl(spider)
            deferred.addBoth(lambda _: reactor.stop())
            reactor.run()
            q.put(None)
        except Exception as e:
            q.put(e)

    q = Queue()
    p = Process(target=f, args=(q,))
    p.start()
    result = q.get()
    p.join()

    if result is not None:
        raise result

## Start the crawler

In [9]:
run_spider(QuotesSpider)

  exporter = cls(crawler)


## Check the files
Verify that the files has been created on disk. As we can observe the files are both created and have data. The .jl file has line separated JSON elements, while the .json file has one big JSON array containing all the quotes.

In [10]:
ll Mel_Subs.*

-rw-r--r-- 1 jupyterlab 13214 Jul  2 20:17 Mel_Subs.jl
-rw-r--r-- 1 jupyterlab 13613 Jul  2 20:17 Mel_Subs.json


In [11]:
!tail -n 2 Mel_Subs.jl

{"name": "Yallambie, Victoria"}
{"name": "Yarraville, Victoria"}


In [12]:
!tail -n 2 Mel_Subs.json

{"name": "Yarraville, Victoria"}
]

## Create dataframes
Pandas can now be used to create dataframes and save the frames to pickles. The .sjon file can be loaded directly into a frame, whereas for the .jl file we need to specify the JSON objects are divided per line. 

NOTE - There is an issue in loading .json file to dataframe as it has multiple json objects in unexpected format.

In [13]:
#dfjson = pd.read_json('Mel_Subs.json', lines=True)
#dfjson


Hence .jl file is used to load the dataframe which has the web scraped list, one in each line

In [14]:

dfjl= pd.read_json('Mel_Subs.jl', lines=True)
dfjl.columns=['suburb']
dfjl

Unnamed: 0,suburb
0,List of Melbourne suburbs
1,"Abbotsford, Victoria"
2,"Aberfeldie, Victoria"
3,"Aintree, Victoria"
4,"Airport West, Victoria"
...,...
392,"Windsor, Victoria"
393,"Wollert, Victoria"
394,Wyndham Vale
395,"Yallambie, Victoria"


# Data Manuipulation for the ease of using

dfjl has a row header = "List of Melbourne suburbs" as one of the value, which must be deleted / dropped

In [15]:
#Searching for the row in the dataframe
dfjl[dfjl['suburb'].str.contains("List")]

Unnamed: 0,suburb
0,List of Melbourne suburbs


In [16]:
#Searching for the row in the dataframe
delete_row = dfjl[dfjl['suburb'].str.contains("List")].index

#Deleting the the row and resetting index
df_subs = dfjl.drop(delete_row).reset_index(drop=True)
df_subs

Unnamed: 0,suburb
0,"Abbotsford, Victoria"
1,"Aberfeldie, Victoria"
2,"Aintree, Victoria"
3,"Airport West, Victoria"
4,"Albanvale, Victoria"
...,...
391,"Windsor, Victoria"
392,"Wollert, Victoria"
393,Wyndham Vale
394,"Yallambie, Victoria"


There need not be "Victoria", in every row, which can be removed

In [17]:
# dropping null value columns to avoid errors 
df_subs.dropna(inplace = True)

# new data frame with split value columns 
new = df_subs['suburb'].str.split(",", n = 1, expand = True)
df_subs['suburb'] = new[0]

# dropping null value columns to avoid errors 
df_subs.dropna(inplace = True)
df_subs

Unnamed: 0,suburb
0,Abbotsford
1,Aberfeldie
2,Aintree
3,Airport West
4,Albanvale
...,...
391,Windsor
392,Wollert
393,Wyndham Vale
394,Yallambie


# Logitude & Latitudes for Mapping

We require the information of latitude and logitide values of all the suburbs to map them effectively. This information can be collected using - geocoder. But since it is unstable most of the times. It can be downloaded from the available csv file which has all the Australia Suburbs Information - Postal Code,State, Name of Suburb, Latitude, Longitude.

In [18]:
df_aus = pd.read_csv('Aus-Postcodes.csv',skipinitialspace=True)
df_aus

Unnamed: 0,postcode,suburb,state,latitude,longitude
0,200,Australian National University,ACT,-35.28,149.12
1,221,Barton,ACT,-35.20,149.1
2,800,Darwin,NT,-12.80,130.96
3,801,Darwin,NT,-12.80,130.96
4,804,Parap,NT,-12.43,130.84
...,...,...,...,...,...
16737,9023,Brisbane GPO Boxes,QLD,0.00,0
16738,9464,Northgate MC,QLD,0.00,0
16739,9726,Gold Coast MC,QLD,0.00,0
16740,9728,Gold Coast MC,QLD,0.00,0


We only require the State of Victoria's details. Its state code is 'VIC'. Hence I am going to filter out the 'VIC' state's data from the available large list of Australian Suburbs. Even the reset of index is done.

In [19]:
df_vic = df_aus[df_aus['state']=='VIC'].reset_index(drop=True)
df_vic

Unnamed: 0,postcode,suburb,state,latitude,longitude
0,3000,Melbourne,VIC,-37.81,144.97
1,3001,Melbourne,VIC,-38.37,144.77
2,3002,East Melbourne,VIC,-37.82,144.99
3,3003,West Melbourne,VIC,-37.81,144.94
4,3004,Melbourne,VIC,-37.84,144.98
...,...,...,...,...,...
3228,8396,Melbourne,VIC,-38.37,144.77
3229,8399,Melbourne,VIC,-38.37,144.77
3230,8576,Ivanhoe,VIC,-37.76,145.04
3231,8627,Camberwell,VIC,-37.84,145.06


We are not going to use postcode and state information neither in calculation nor in mapping. Hence we can drop those columns.

In [20]:
columns = ['suburb','latitude','longitude']
df_vic = df_vic[columns]
df_vic

Unnamed: 0,suburb,latitude,longitude
0,Melbourne,-37.81,144.97
1,Melbourne,-38.37,144.77
2,East Melbourne,-37.82,144.99
3,West Melbourne,-37.81,144.94
4,Melbourne,-37.84,144.98
...,...,...,...
3228,Melbourne,-38.37,144.77
3229,Melbourne,-38.37,144.77
3230,Ivanhoe,-37.76,145.04
3231,Camberwell,-37.84,145.06


If we observe the rows in df_vic clearly, we can find multiple fields for "Melbourne" suburb. which is not required. And if if those records are not dropped would distort the map.

In [21]:
#df_vic[df_vic['suburb']=='Melbourne']

In [22]:
#Dropping all the other rows with suburb as "Melbourne" other than at 0th index with latitide = '-37.81' and resetting the index
#df_vic = df_vic.drop(df_vic[(df_vic['suburb']=='Melbourne') & (df_vic['latitude']!=-37.81)].index).reset_index(drop=True)
#df_vic

In [23]:
# saving this to use later-on
df_temp = df_vic[(df_vic['suburb']=='Melbourne') & (df_vic['latitude']==-37.81)]
df_temp

Unnamed: 0,suburb,latitude,longitude
0,Melbourne,-37.81,144.97


Now that we have two data frames - 1. List of Melbourne Suburbs[df_subs] & 2. List of all suburbs in Victoria State with latitide and logitude data[df_vic]. We can merge both the dataframes and get the required information of MELBOURNE SUBURBS.

In [24]:
#Merge the df_subs with df_vic on 'suburb'
df_Mel_Subs = pd.merge(df_subs,df_vic,on=['suburb'])
df_Mel_Subs.dropna(inplace = True)
df_Mel_Subs

Unnamed: 0,suburb,latitude,longitude
0,Abbotsford,-37.80,145
1,Aberfeldie,-37.76,144.9
2,Airport West,-37.71,144.89
3,Albanvale,-37.75,144.77
4,Albert Park,-37.84,144.96
...,...,...,...
374,Windsor,-37.85,144.99
375,Wollert,-38.38,144.81
376,Wyndham Vale,-37.90,144.56
377,Yallambie,-37.73,145.07


There might be few suburbs whose latitude and longitude information is populated as 0.0 or 0. This can be fetched by

In [25]:
df_Mel_Subs[(df_Mel_Subs['latitude']==0) | (df_Mel_Subs['longitude']==0)]

Unnamed: 0,suburb,latitude,longitude
211,Docklands,0.0,0


This row can be dropped

In [26]:
df_Mel_Subs = df_Mel_Subs.drop(df_Mel_Subs[(df_Mel_Subs['latitude']==0) | (df_Mel_Subs['longitude']==0)].index).reset_index(drop=True)
df_Mel_Subs

Unnamed: 0,suburb,latitude,longitude
0,Abbotsford,-37.80,145
1,Aberfeldie,-37.76,144.9
2,Airport West,-37.71,144.89
3,Albanvale,-37.75,144.77
4,Albert Park,-37.84,144.96
...,...,...,...
373,Windsor,-37.85,144.99
374,Wollert,-38.38,144.81
375,Wyndham Vale,-37.90,144.56
376,Yallambie,-37.73,145.07


#### By carefully oberving the list I found out that this particular dataframe does not contain - "Melbourne CBD" / "Melbourne" or normally called as CBD. We can add it manually.

In [27]:
df_temp

Unnamed: 0,suburb,latitude,longitude
0,Melbourne,-37.81,144.97


In [28]:
df_Mel_Subs = df_Mel_Subs.append(df_temp, ignore_index = True)
df_Mel_Subs

Unnamed: 0,suburb,latitude,longitude
0,Abbotsford,-37.80,145
1,Aberfeldie,-37.76,144.9
2,Airport West,-37.71,144.89
3,Albanvale,-37.75,144.77
4,Albert Park,-37.84,144.96
...,...,...,...
374,Wollert,-38.38,144.81
375,Wyndham Vale,-37.90,144.56
376,Yallambie,-37.73,145.07
377,Yarraville,-37.82,144.89


In [29]:
df_Mel_Subs.dtypes

suburb        object
latitude     float64
longitude     object
dtype: object

In [30]:
df_Mel_Subs['longitude'] = df_Mel_Subs['longitude'].astype(float)
df_Mel_Subs['latitude'] = df_Mel_Subs['latitude'].astype(float)
df_Mel_Subs.dtypes

suburb        object
latitude     float64
longitude    float64
dtype: object

### Use geopy library to get the latitude and longitude values of Melbourne City.

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent <em>ny_explorer</em>, as shown below.

In [31]:
address = 'Melbourne, Australia'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Melbourne, Australia are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Melbourne, Australia are -37.8142176, 144.9631608.


### Create a map of Melbourne with its suburbs superimposed on top.

In [32]:
# create map of Melbourne using latitude and longitude values
map_melb = folium.Map(location=[latitude, longitude], zoom_start=9)

# add markers to map
for lat, lng, suburb in zip(df_Mel_Subs['latitude'], df_Mel_Subs['longitude'], df_Mel_Subs['suburb']):
    label = '{}, {}'.format(suburb,'VIC')
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_melb)

<folium.features.CircleMarker at 0x7f9802ab2da0>

<folium.features.CircleMarker at 0x7f98029f8b38>

<folium.features.CircleMarker at 0x7f98029f8a20>

<folium.features.CircleMarker at 0x7f9802ad68d0>

<folium.features.CircleMarker at 0x7f98029f89e8>

<folium.features.CircleMarker at 0x7f98029f8080>

<folium.features.CircleMarker at 0x7f98029f8c50>

<folium.features.CircleMarker at 0x7f9802ad66a0>

<folium.features.CircleMarker at 0x7f9802ad6ac8>

<folium.features.CircleMarker at 0x7f9802ad6780>

<folium.features.CircleMarker at 0x7f9802ad6550>

<folium.features.CircleMarker at 0x7f9802ad67f0>

<folium.features.CircleMarker at 0x7f9802ad69b0>

<folium.features.CircleMarker at 0x7f9802a43c88>

<folium.features.CircleMarker at 0x7f9802a5a400>

<folium.features.CircleMarker at 0x7f9802a5ac18>

<folium.features.CircleMarker at 0x7f9802ad6cc0>

<folium.features.CircleMarker at 0x7f9802a5a2b0>

<folium.features.CircleMarker at 0x7f9802a5a128>

<folium.features.CircleMarker at 0x7f9802a5a208>

<folium.features.CircleMarker at 0x7f9802a43860>

<folium.features.CircleMarker at 0x7f9802a43b70>

<folium.features.CircleMarker at 0x7f9802a5ac88>

<folium.features.CircleMarker at 0x7f9802a5add8>

<folium.features.CircleMarker at 0x7f9802a5a7b8>

<folium.features.CircleMarker at 0x7f9802a62588>

<folium.features.CircleMarker at 0x7f9802a439b0>

<folium.features.CircleMarker at 0x7f9802a62f60>

<folium.features.CircleMarker at 0x7f9802a62da0>

<folium.features.CircleMarker at 0x7f9802a628d0>

<folium.features.CircleMarker at 0x7f9802a62518>

<folium.features.CircleMarker at 0x7f9802a0e390>

<folium.features.CircleMarker at 0x7f9802a62550>

<folium.features.CircleMarker at 0x7f9802a0ef28>

<folium.features.CircleMarker at 0x7f9802a0eda0>

<folium.features.CircleMarker at 0x7f9802a0ef60>

<folium.features.CircleMarker at 0x7f9802a0e8d0>

<folium.features.CircleMarker at 0x7f98029fa588>

<folium.features.CircleMarker at 0x7f98029fa9b0>

<folium.features.CircleMarker at 0x7f9802a62c88>

<folium.features.CircleMarker at 0x7f9802a0e5f8>

<folium.features.CircleMarker at 0x7f98029fa390>

<folium.features.CircleMarker at 0x7f98029fab38>

<folium.features.CircleMarker at 0x7f9802a0e2e8>

<folium.features.CircleMarker at 0x7f9802a034a8>

<folium.features.CircleMarker at 0x7f9802a03ba8>

<folium.features.CircleMarker at 0x7f98029fa978>

<folium.features.CircleMarker at 0x7f9802a03d68>

<folium.features.CircleMarker at 0x7f98029fc1d0>

<folium.features.CircleMarker at 0x7f9802a0ed30>

<folium.features.CircleMarker at 0x7f9802a03048>

<folium.features.CircleMarker at 0x7f98029fc198>

<folium.features.CircleMarker at 0x7f98029fc358>

<folium.features.CircleMarker at 0x7f98029fc2b0>

<folium.features.CircleMarker at 0x7f98029c0898>

<folium.features.CircleMarker at 0x7f9802a034e0>

<folium.features.CircleMarker at 0x7f98029c0588>

<folium.features.CircleMarker at 0x7f98029c0748>

<folium.features.CircleMarker at 0x7f98029c07b8>

<folium.features.CircleMarker at 0x7f98029c0128>

<folium.features.CircleMarker at 0x7f98029eeeb8>

<folium.features.CircleMarker at 0x7f98029fc9e8>

<folium.features.CircleMarker at 0x7f98029c0a20>

<folium.features.CircleMarker at 0x7f98029ee898>

<folium.features.CircleMarker at 0x7f98029c1668>

<folium.features.CircleMarker at 0x7f98029ee7f0>

<folium.features.CircleMarker at 0x7f98029c1550>

<folium.features.CircleMarker at 0x7f98029c0f98>

<folium.features.CircleMarker at 0x7f98029c1828>

<folium.features.CircleMarker at 0x7f98029c1128>

<folium.features.CircleMarker at 0x7f98029c1b00>

<folium.features.CircleMarker at 0x7f98029c1048>

<folium.features.CircleMarker at 0x7f98029a2908>

<folium.features.CircleMarker at 0x7f98029c1470>

<folium.features.CircleMarker at 0x7f98029c1f28>

<folium.features.CircleMarker at 0x7f98029a2d30>

<folium.features.CircleMarker at 0x7f98029a2320>

<folium.features.CircleMarker at 0x7f98029a2d68>

<folium.features.CircleMarker at 0x7f98029c1dd8>

<folium.features.CircleMarker at 0x7f98029a27b8>

<folium.features.CircleMarker at 0x7f98029a2438>

<folium.features.CircleMarker at 0x7f9802981dd8>

<folium.features.CircleMarker at 0x7f9802981a58>

<folium.features.CircleMarker at 0x7f9802981c50>

<folium.features.CircleMarker at 0x7f98029a8470>

<folium.features.CircleMarker at 0x7f980295a898>

<folium.features.CircleMarker at 0x7f980295af60>

<folium.features.CircleMarker at 0x7f980295ac18>

<folium.features.CircleMarker at 0x7f9802981438>

<folium.features.CircleMarker at 0x7f9802981d30>

<folium.features.CircleMarker at 0x7f98029a8c18>

<folium.features.CircleMarker at 0x7f980295aa58>

<folium.features.CircleMarker at 0x7f980293e2e8>

<folium.features.CircleMarker at 0x7f98029eea20>

<folium.features.CircleMarker at 0x7f98029a8160>

<folium.features.CircleMarker at 0x7f980295a5c0>

<folium.features.CircleMarker at 0x7f980295a400>

<folium.features.CircleMarker at 0x7f98029476a0>

<folium.features.CircleMarker at 0x7f9802947978>

<folium.features.CircleMarker at 0x7f9802947278>

<folium.features.CircleMarker at 0x7f9802947358>

<folium.features.CircleMarker at 0x7f9802947a20>

<folium.features.CircleMarker at 0x7f9802947eb8>

<folium.features.CircleMarker at 0x7f980293e4e0>

<folium.features.CircleMarker at 0x7f980293ee10>

<folium.features.CircleMarker at 0x7f980296a550>

<folium.features.CircleMarker at 0x7f980296ac50>

<folium.features.CircleMarker at 0x7f980295aa20>

<folium.features.CircleMarker at 0x7f980296a748>

<folium.features.CircleMarker at 0x7f980296ad68>

<folium.features.CircleMarker at 0x7f980293eac8>

<folium.features.CircleMarker at 0x7f980293edd8>

<folium.features.CircleMarker at 0x7f980293eda0>

<folium.features.CircleMarker at 0x7f9802947dd8>

<folium.features.CircleMarker at 0x7f98029a8828>

<folium.features.CircleMarker at 0x7f9802916a58>

<folium.features.CircleMarker at 0x7f9802916748>

<folium.features.CircleMarker at 0x7f9802916c88>

<folium.features.CircleMarker at 0x7f9802916080>

<folium.features.CircleMarker at 0x7f9802928a20>

<folium.features.CircleMarker at 0x7f9802928e48>

<folium.features.CircleMarker at 0x7f980296af28>

<folium.features.CircleMarker at 0x7f98029287f0>

<folium.features.CircleMarker at 0x7f98029287b8>

<folium.features.CircleMarker at 0x7f9802916128>

<folium.features.CircleMarker at 0x7f9802928358>

<folium.features.CircleMarker at 0x7f98028ff3c8>

<folium.features.CircleMarker at 0x7f980293e780>

<folium.features.CircleMarker at 0x7f98028ff550>

<folium.features.CircleMarker at 0x7f98028ff278>

<folium.features.CircleMarker at 0x7f98029286d8>

<folium.features.CircleMarker at 0x7f98028fff28>

<folium.features.CircleMarker at 0x7f9802928e80>

<folium.features.CircleMarker at 0x7f9802930b00>

<folium.features.CircleMarker at 0x7f98028c07f0>

<folium.features.CircleMarker at 0x7f98028ffe48>

<folium.features.CircleMarker at 0x7f98028c0f28>

<folium.features.CircleMarker at 0x7f98028c0080>

<folium.features.CircleMarker at 0x7f98028c0c50>

<folium.features.CircleMarker at 0x7f98028c0f60>

<folium.features.CircleMarker at 0x7f9802930cf8>

<folium.features.CircleMarker at 0x7f98028ee588>

<folium.features.CircleMarker at 0x7f98028c0780>

<folium.features.CircleMarker at 0x7f98028c0908>

<folium.features.CircleMarker at 0x7f98029305f8>

<folium.features.CircleMarker at 0x7f9802930ba8>

<folium.features.CircleMarker at 0x7f98028eeb70>

<folium.features.CircleMarker at 0x7f98028ee5f8>

<folium.features.CircleMarker at 0x7f98028d89b0>

<folium.features.CircleMarker at 0x7f98028ff208>

<folium.features.CircleMarker at 0x7f98028eea20>

<folium.features.CircleMarker at 0x7f98028ee2b0>

<folium.features.CircleMarker at 0x7f98028d8470>

<folium.features.CircleMarker at 0x7f98028d8e80>

<folium.features.CircleMarker at 0x7f98028d8940>

<folium.features.CircleMarker at 0x7f98028e9f28>

<folium.features.CircleMarker at 0x7f98028eef98>

<folium.features.CircleMarker at 0x7f98028d8908>

<folium.features.CircleMarker at 0x7f98028e9240>

<folium.features.CircleMarker at 0x7f98028e90f0>

<folium.features.CircleMarker at 0x7f98028e9e80>

<folium.features.CircleMarker at 0x7f980287ec88>

<folium.features.CircleMarker at 0x7f980288dc88>

<folium.features.CircleMarker at 0x7f980287e940>

<folium.features.CircleMarker at 0x7f98028d8d68>

<folium.features.CircleMarker at 0x7f980287e240>

<folium.features.CircleMarker at 0x7f98028e9e10>

<folium.features.CircleMarker at 0x7f98028e92b0>

<folium.features.CircleMarker at 0x7f98028e9940>

<folium.features.CircleMarker at 0x7f98028d8b70>

<folium.features.CircleMarker at 0x7f980287e780>

<folium.features.CircleMarker at 0x7f9802890b00>

<folium.features.CircleMarker at 0x7f9802890278>

<folium.features.CircleMarker at 0x7f98028901d0>

<folium.features.CircleMarker at 0x7f98028904e0>

<folium.features.CircleMarker at 0x7f980288df60>

<folium.features.CircleMarker at 0x7f980288f860>

<folium.features.CircleMarker at 0x7f980287e9e8>

<folium.features.CircleMarker at 0x7f98028905c0>

<folium.features.CircleMarker at 0x7f9802890ba8>

<folium.features.CircleMarker at 0x7f9802890828>

<folium.features.CircleMarker at 0x7f98028909e8>

<folium.features.CircleMarker at 0x7f980288f2b0>

<folium.features.CircleMarker at 0x7f9802843a20>

<folium.features.CircleMarker at 0x7f9802890f60>

<folium.features.CircleMarker at 0x7f980288f8d0>

<folium.features.CircleMarker at 0x7f980288fa58>

<folium.features.CircleMarker at 0x7f980288f5f8>

<folium.features.CircleMarker at 0x7f9802843a58>

<folium.features.CircleMarker at 0x7f98028432e8>

<folium.features.CircleMarker at 0x7f98028588d0>

<folium.features.CircleMarker at 0x7f980288dac8>

<folium.features.CircleMarker at 0x7f98028432b0>

<folium.features.CircleMarker at 0x7f9802843ba8>

<folium.features.CircleMarker at 0x7f98028587f0>

<folium.features.CircleMarker at 0x7f9802858da0>

<folium.features.CircleMarker at 0x7f9802858d30>

<folium.features.CircleMarker at 0x7f980288f978>

<folium.features.CircleMarker at 0x7f9802843278>

<folium.features.CircleMarker at 0x7f98028587b8>

<folium.features.CircleMarker at 0x7f980286c5c0>

<folium.features.CircleMarker at 0x7f980286ca90>

<folium.features.CircleMarker at 0x7f980286c1d0>

<folium.features.CircleMarker at 0x7f980286c3c8>

<folium.features.CircleMarker at 0x7f98028767b8>

<folium.features.CircleMarker at 0x7f980288f4a8>

<folium.features.CircleMarker at 0x7f9802858cf8>

<folium.features.CircleMarker at 0x7f98027fe908>

<folium.features.CircleMarker at 0x7f98027fef28>

<folium.features.CircleMarker at 0x7f98027fe358>

<folium.features.CircleMarker at 0x7f98027fe860>

<folium.features.CircleMarker at 0x7f980281b278>

<folium.features.CircleMarker at 0x7f98027fee80>

<folium.features.CircleMarker at 0x7f980286c748>

<folium.features.CircleMarker at 0x7f98028762e8>

<folium.features.CircleMarker at 0x7f9802876d30>

<folium.features.CircleMarker at 0x7f9802876c50>

<folium.features.CircleMarker at 0x7f9802876e80>

<folium.features.CircleMarker at 0x7f98027fa4e0>

<folium.features.CircleMarker at 0x7f9802858978>

<folium.features.CircleMarker at 0x7f98027fe390>

<folium.features.CircleMarker at 0x7f980281b588>

<folium.features.CircleMarker at 0x7f980281b128>

<folium.features.CircleMarker at 0x7f980281b748>

<folium.features.CircleMarker at 0x7f98027fae48>

<folium.features.CircleMarker at 0x7f98028166d8>

<folium.features.CircleMarker at 0x7f9802876940>

<folium.features.CircleMarker at 0x7f98027fa4a8>

<folium.features.CircleMarker at 0x7f980281be10>

<folium.features.CircleMarker at 0x7f9802816a58>

<folium.features.CircleMarker at 0x7f9802816828>

<folium.features.CircleMarker at 0x7f98027faeb8>

<folium.features.CircleMarker at 0x7f98027dedd8>

<folium.features.CircleMarker at 0x7f98027fada0>

<folium.features.CircleMarker at 0x7f98027fa2b0>

<folium.features.CircleMarker at 0x7f9802816588>

<folium.features.CircleMarker at 0x7f98027dec88>

<folium.features.CircleMarker at 0x7f98027de470>

<folium.features.CircleMarker at 0x7f98027de7b8>

<folium.features.CircleMarker at 0x7f98027e8908>

<folium.features.CircleMarker at 0x7f9802816940>

<folium.features.CircleMarker at 0x7f98027debe0>

<folium.features.CircleMarker at 0x7f98027e8588>

<folium.features.CircleMarker at 0x7f98027e8da0>

<folium.features.CircleMarker at 0x7f98027e85f8>

<folium.features.CircleMarker at 0x7f98027e86d8>

<folium.features.CircleMarker at 0x7f98027df860>

<folium.features.CircleMarker at 0x7f98027e8630>

<folium.features.CircleMarker at 0x7f98027dee10>

<folium.features.CircleMarker at 0x7f98027df7f0>

<folium.features.CircleMarker at 0x7f98027eb0b8>

<folium.features.CircleMarker at 0x7f98027eb748>

<folium.features.CircleMarker at 0x7f98027ebb70>

<folium.features.CircleMarker at 0x7f98027deb70>

<folium.features.CircleMarker at 0x7f98027e8828>

<folium.features.CircleMarker at 0x7f98027ebc88>

<folium.features.CircleMarker at 0x7f98027df390>

<folium.features.CircleMarker at 0x7f98027ebeb8>

<folium.features.CircleMarker at 0x7f98027eb898>

<folium.features.CircleMarker at 0x7f980277bc88>

<folium.features.CircleMarker at 0x7f980278c8d0>

<folium.features.CircleMarker at 0x7f98027df9b0>

<folium.features.CircleMarker at 0x7f98027df7b8>

<folium.features.CircleMarker at 0x7f980278ce48>

<folium.features.CircleMarker at 0x7f980278cf98>

<folium.features.CircleMarker at 0x7f980278c278>

<folium.features.CircleMarker at 0x7f980277bac8>

<folium.features.CircleMarker at 0x7f98027ebd68>

<folium.features.CircleMarker at 0x7f98027df358>

<folium.features.CircleMarker at 0x7f980277ba20>

<folium.features.CircleMarker at 0x7f980278c198>

<folium.features.CircleMarker at 0x7f980278c668>

<folium.features.CircleMarker at 0x7f98027b36d8>

<folium.features.CircleMarker at 0x7f98027b3320>

<folium.features.CircleMarker at 0x7f98027b5160>

<folium.features.CircleMarker at 0x7f980278cf28>

<folium.features.CircleMarker at 0x7f980278c550>

<folium.features.CircleMarker at 0x7f98027b3278>

<folium.features.CircleMarker at 0x7f98027b5978>

<folium.features.CircleMarker at 0x7f98027b5780>

<folium.features.CircleMarker at 0x7f98027b5e80>

<folium.features.CircleMarker at 0x7f980278c128>

<folium.features.CircleMarker at 0x7f98027b37f0>

<folium.features.CircleMarker at 0x7f98027b5208>

<folium.features.CircleMarker at 0x7f98027401d0>

<folium.features.CircleMarker at 0x7f98027409e8>

<folium.features.CircleMarker at 0x7f9802740a90>

<folium.features.CircleMarker at 0x7f9802740dd8>

<folium.features.CircleMarker at 0x7f9802749320>

<folium.features.CircleMarker at 0x7f98027b5550>

<folium.features.CircleMarker at 0x7f9802749cc0>

<folium.features.CircleMarker at 0x7f98027499e8>

<folium.features.CircleMarker at 0x7f9802749b70>

<folium.features.CircleMarker at 0x7f9802749c50>

<folium.features.CircleMarker at 0x7f9802776c88>

<folium.features.CircleMarker at 0x7f98027b55c0>

<folium.features.CircleMarker at 0x7f9802740cf8>

<folium.features.CircleMarker at 0x7f98027769e8>

<folium.features.CircleMarker at 0x7f98027492b0>

<folium.features.CircleMarker at 0x7f9802749f60>

<folium.features.CircleMarker at 0x7f9802749a90>

<folium.features.CircleMarker at 0x7f980276c470>

<folium.features.CircleMarker at 0x7f98027023c8>

<folium.features.CircleMarker at 0x7f9802776860>

<folium.features.CircleMarker at 0x7f98027767b8>

<folium.features.CircleMarker at 0x7f9802702198>

<folium.features.CircleMarker at 0x7f98027024e0>

<folium.features.CircleMarker at 0x7f980276c3c8>

<folium.features.CircleMarker at 0x7f9802702a20>

<folium.features.CircleMarker at 0x7f9802740eb8>

<folium.features.CircleMarker at 0x7f980276c908>

<folium.features.CircleMarker at 0x7f9802702cc0>

<folium.features.CircleMarker at 0x7f98027022b0>

<folium.features.CircleMarker at 0x7f9802702390>

<folium.features.CircleMarker at 0x7f980270c400>

<folium.features.CircleMarker at 0x7f980270cb00>

<folium.features.CircleMarker at 0x7f980276cbe0>

<folium.features.CircleMarker at 0x7f9802776c18>

<folium.features.CircleMarker at 0x7f980270c588>

<folium.features.CircleMarker at 0x7f980270c3c8>

<folium.features.CircleMarker at 0x7f9802734630>

<folium.features.CircleMarker at 0x7f9802734b70>

<folium.features.CircleMarker at 0x7f98026c4dd8>

<folium.features.CircleMarker at 0x7f9802702f98>

<folium.features.CircleMarker at 0x7f980270c278>

<folium.features.CircleMarker at 0x7f98026c4be0>

<folium.features.CircleMarker at 0x7f9802734550>

<folium.features.CircleMarker at 0x7f98026c4908>

<folium.features.CircleMarker at 0x7f9802734e48>

<folium.features.CircleMarker at 0x7f98026c45f8>

<folium.features.CircleMarker at 0x7f9802734780>

<folium.features.CircleMarker at 0x7f980270c208>

<folium.features.CircleMarker at 0x7f98026c4630>

<folium.features.CircleMarker at 0x7f98026e34a8>

<folium.features.CircleMarker at 0x7f98026e3be0>

<folium.features.CircleMarker at 0x7f98026e35c0>

<folium.features.CircleMarker at 0x7f98026c92e8>

<folium.features.CircleMarker at 0x7f98026c92b0>

<folium.features.CircleMarker at 0x7f98026c4748>

<folium.features.CircleMarker at 0x7f98026c9d30>

<folium.features.CircleMarker at 0x7f98026e3518>

<folium.features.CircleMarker at 0x7f98026c9978>

<folium.features.CircleMarker at 0x7f98026e46a0>

<folium.features.CircleMarker at 0x7f98026e4518>

<folium.features.CircleMarker at 0x7f98027345f8>

<folium.features.CircleMarker at 0x7f98026e3fd0>

<folium.features.CircleMarker at 0x7f98026c9f60>

<folium.features.CircleMarker at 0x7f98026e44a8>

<folium.features.CircleMarker at 0x7f98026e4c18>

<folium.features.CircleMarker at 0x7f98026abc88>

<folium.features.CircleMarker at 0x7f98026ab5c0>

<folium.features.CircleMarker at 0x7f9802684f98>

<folium.features.CircleMarker at 0x7f98026e3a58>

<folium.features.CircleMarker at 0x7f9802684518>

<folium.features.CircleMarker at 0x7f9802684ac8>

<folium.features.CircleMarker at 0x7f9802684278>

<folium.features.CircleMarker at 0x7f98026840b8>

<folium.features.CircleMarker at 0x7f98026abcc0>

<folium.features.CircleMarker at 0x7f980269eba8>

<folium.features.CircleMarker at 0x7f98026abdd8>

<folium.features.CircleMarker at 0x7f98026ab7b8>

<folium.features.CircleMarker at 0x7f9802684b38>

<folium.features.CircleMarker at 0x7f980269e240>

<folium.features.CircleMarker at 0x7f980269e048>

<folium.features.CircleMarker at 0x7f980269ec88>

<folium.features.CircleMarker at 0x7f98026866d8>

<folium.features.CircleMarker at 0x7f980269e588>

<folium.features.CircleMarker at 0x7f9802684438>

<folium.features.CircleMarker at 0x7f9802686400>

<folium.features.CircleMarker at 0x7f9802686908>

<folium.features.CircleMarker at 0x7f9802686470>

<folium.features.CircleMarker at 0x7f98026867b8>

<folium.features.CircleMarker at 0x7f98026674a8>

<folium.features.CircleMarker at 0x7f980263d0b8>

<folium.features.CircleMarker at 0x7f980269edd8>

<folium.features.CircleMarker at 0x7f980263dfd0>

<folium.features.CircleMarker at 0x7f98026864e0>

<folium.features.CircleMarker at 0x7f980263db38>

<folium.features.CircleMarker at 0x7f9802667f28>

In [33]:
map_melb

# Fetching the list of Indian Restaurants in Melbourne CBD and render them on Map!!

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

In [34]:
CLIENT_ID = 'J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ' # your Foursquare ID
CLIENT_SECRET = '2UP1CVJ5N0FC5KGF5FKLY4SG5CTMF3HQWTFJS20FEL5R4WGN' # your Foursquare Secret
VERSION = '20180604'# Foursquare API version, Then try todays date

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ
CLIENT_SECRET:2UP1CVJ5N0FC5KGF5FKLY4SG5CTMF3HQWTFJS20FEL5R4WGN


#### Let's explore Melbourne CBD first, I believe there must be many Indian Restaurants in CBD!!

In [35]:
df_Mel_Subs[df_Mel_Subs['suburb']=='Melbourne']

Unnamed: 0,suburb,latitude,longitude
378,Melbourne,-37.81,144.97


In [36]:
search_query = 'Indian'
radius = 1000
LIMIT = 50
print(search_query + ' .... OK!')

Indian .... OK!


In [37]:
address = 'Melbourne, VIC'
#-37.5939889 145.0337333
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.8142176 144.9631608


In [38]:
# latitude=-37.81
# longitude=144.97

In [39]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    latitude, 
    longitude, 
    VERSION, 
    search_query, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ&client_secret=2UP1CVJ5N0FC5KGF5FKLY4SG5CTMF3HQWTFJS20FEL5R4WGN&ll=-37.8142176,144.9631608&v=20180604&query=Indian&radius=1000&limit=50'

In [40]:
results = requests.get(url).json()
#results

In [41]:
#results

In [42]:
venue_data=results['response']['venues']
#venue_data

In [43]:
venue_details=[]
for row in venue_data:
    try:
        venue_id=row['id']
        venue_name=row['name']
        venue_lat=row['location']['lat']
        venue_lng=row['location']['lng']
        venue_category=row['categories'][0]['name']
        venue_details.append([venue_id,venue_name,venue_lat,venue_lng,venue_category])
    except KeyError:
        pass
        
column_names=['ID','Name','Lat','Lng','Category']
venues = pd.DataFrame(venue_details,columns=column_names) 
venues

Unnamed: 0,ID,Name,Lat,Lng,Category
0,4b768359f964a520b84f2ee3,Shiraaz Fine Indian Cuisine,-37.818692,144.95994,Indian Restaurant
1,5aa4d624b1538e4b65bf900d,Walkers Stop Convenience Indian Grocery,-37.814998,144.954822,Grocery Store
2,4ba44773f964a520799238e3,Drums Indian Cafe,-37.806458,144.958643,Indian Restaurant
3,4b05874df964a5205e8a22e3,Gaylord Indian Restaurant,-37.820263,144.954499,Indian Restaurant
4,4b05874ef964a520b28a22e3,Nirankar Indian Restaurant,-37.814383,144.960548,Indian Restaurant
5,5243b8f22fc65bb2d81f5182,Indian Mirror,-37.815488,144.96652,Bookstore
6,4e15208952b1b9e5643e0fd0,Indian Passport and Visa Services Centre,-37.816272,144.967,Embassy / Consulate
7,5b403404c9a5170039c289b4,Tejas Modern Indian,-37.815271,144.961671,Indian Restaurant
8,544dc387498ea3736c48cab7,indian visa embassy,-37.815986,144.967305,Embassy / Consulate
9,4dbe28db0437955ec05ee699,Indian Embassy @ Melbourne,-37.816382,144.967016,Embassy / Consulate


In [44]:
indian_resturants_melb=venues[venues['Category']=='Indian Restaurant'] 
#indian_resturants_melb

In [45]:
print("No of Indian Restaurants in Melbourne CBD is",str(len(indian_resturants_melb)))

No of Indian Restaurants in Melbourne CBD is 11


In [46]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Melbourne',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Indian restaurants as blue circle markers
for lat, lng, label in zip(indian_resturants_melb.Lat, indian_resturants_melb.Lng, indian_resturants_melb.Name):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

<folium.features.CircleMarker at 0x7f9802368390>

<folium.features.CircleMarker at 0x7f9802368dd8>

<folium.features.CircleMarker at 0x7f9802368d68>

<folium.features.CircleMarker at 0x7f9802368b70>

<folium.features.CircleMarker at 0x7f980236fcf8>

<folium.features.CircleMarker at 0x7f980236f5c0>

<folium.features.CircleMarker at 0x7f98023680b8>

<folium.features.CircleMarker at 0x7f980236f2b0>

<folium.features.CircleMarker at 0x7f98022f8400>

<folium.features.CircleMarker at 0x7f98022f89b0>

<folium.features.CircleMarker at 0x7f98022f8080>

<folium.features.CircleMarker at 0x7f980236f4a8>

In [47]:
# display map
venues_map

# Search & Map all the Indian Restaurants in and around Melbourne suburbs

#### Let's create a function to repeat the same process as Melbourne, CBD to all the suburbs in Melbourne

In [48]:
def get_venues(lat,lng):
    
    #set variables
    radius=1000
    LIMIT=200
    search_query = 'Indian'
    CLIENT_ID = 'J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ' # your Foursquare ID
    CLIENT_SECRET = '2UP1CVJ5N0FC5KGF5FKLY4SG5CTMF3HQWTFJS20FEL5R4WGN' # your Foursquare Secret
    VERSION = '20200401' # Foursquare API version

    #url to fetch data from foursquare api
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET,
            lat, 
            lng, 
            VERSION, 
            search_query, 
            radius, 
            LIMIT)
    
    # get all the data
    results = requests.get(url).json()
    #print('##################')
    #print(results)
    venue_data=results['response']['venues']
    venue_details=[]
    for row in venue_data:
        try:
            venue_id=row['id']
            venue_name=row['name']
            venue_lat=row['location']['lat']
            venue_lng=row['location']['lng']
            venue_category=row['categories'][0]['name']
            venue_details.append([venue_id,venue_name,venue_lat,venue_lng,venue_category])
        except KeyError:
            pass
        
    column_names=['ID','Name','Lat','Lng','Category']
    df = pd.DataFrame(venue_details,columns=column_names)
    return df

In [49]:
# prepare neighborhood list that contains indian resturants
column_names=['Suburb', 'ID','Name','Lat','Lng']
indian_rest=pd.DataFrame(columns=column_names)
count=1
for row in df_Mel_Subs.values.tolist():
    try:
        Suburb, Latitude, Longitude=row
        venues = get_venues(Latitude,Longitude)
        #print(venues)
    except:
        print("Oops!", sys.exc_info()[0], "occurred while trying ",Suburb)
        print("Trying Again!!")
        
    indian_resturants=venues[venues['Category']=='Indian Restaurant']
    print('(',count,'/',len(df_Mel_Subs),')','Indian Restaurant in '+Suburb+', '+'VIC'+':'+str(len(indian_resturants)))
    for resturant_detail in indian_resturants.values.tolist():
        id, name , lat, lng, category=resturant_detail
        indian_rest = indian_rest.append({'Suburb': Suburb,
                                                'ID': id,
                                                'Name' : name,
                                                'Lat' : lat,
                                                'Lng' : lng
                                               }, ignore_index=True)
    count+=1

( 1 / 379 ) Indian Restaurant in Abbotsford, VIC:1
( 2 / 379 ) Indian Restaurant in Aberfeldie, VIC:0
( 3 / 379 ) Indian Restaurant in Airport West, VIC:0
( 4 / 379 ) Indian Restaurant in Albanvale, VIC:0
( 5 / 379 ) Indian Restaurant in Albert Park, VIC:5
( 6 / 379 ) Indian Restaurant in Albion, VIC:1
( 7 / 379 ) Indian Restaurant in Alphington, VIC:1
( 8 / 379 ) Indian Restaurant in Altona Meadows, VIC:0
( 9 / 379 ) Indian Restaurant in Altona North, VIC:0
( 10 / 379 ) Indian Restaurant in Altona, VIC:0
( 11 / 379 ) Indian Restaurant in Ardeer, VIC:0
( 12 / 379 ) Indian Restaurant in Armadale, VIC:0
( 13 / 379 ) Indian Restaurant in Ascot Vale, VIC:1
( 14 / 379 ) Indian Restaurant in Ashburton, VIC:1
( 15 / 379 ) Indian Restaurant in Ashwood, VIC:1
( 16 / 379 ) Indian Restaurant in Aspendale Gardens, VIC:0
( 17 / 379 ) Indian Restaurant in Aspendale, VIC:0
( 18 / 379 ) Indian Restaurant in Attwood, VIC:0
( 19 / 379 ) Indian Restaurant in Auburn, VIC:0
( 20 / 379 ) Indian Restaurant i

In [50]:
indian_rest

Unnamed: 0,Suburb,ID,Name,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,-37.831110,144.953620
...,...,...,...,...,...
230,Windsor,5811d139d67ce0d723652355,spicy fusion Indian bar and restaurant,-37.846501,144.993526
231,Windsor,5299966911d24998f6446a66,INDIAN TAKE AWAY OR EAT IN,-37.839181,144.992992
232,Windsor,4b8cb19af964a520afda32e3,Flag Of India Indian Restaurant,-37.847543,144.999303
233,Windsor,4b9994e6f964a5204b8535e3,Yarra Indian Take Away,-37.839297,144.993386


### Business Problem

Fetch the list of unique list of suburbs from the above dataframe to get the list of suburbs with minimum one existing Indian Restaurant. We can minus this list from the list of suburbs in Melbourne and get the list of suburbs which lack Indian Restaurants

In [51]:
df_A = indian_rest[['Suburb']].drop_duplicates(['Suburb'],keep='first', ignore_index=True)
df_A.shape

(122, 1)

In [52]:
df_B = df_Mel_Subs[['suburb']].drop_duplicates(['suburb'],keep='first', ignore_index=True)
df_B.shape

(371, 1)

We can remove suburbs with Indian Restaurants(df_A) from list of suburbs in Melbourne(df_B) and get the list of suburbs which do not have an Indian Restaurant

In [53]:
# df1[df1.ID.isin(df2.ID) == False]
df_C = df_B[df_B['suburb'].isin(df_A['Suburb']) == False].reset_index(drop=True)
df_C.shape

(249, 1)

#### i.e there are 312 suburbs without an Indian Restaurants. We can get the list from below....

In [54]:
df_C['suburb'].tolist()

['Aberfeldie',
 'Airport West',
 'Albanvale',
 'Altona Meadows',
 'Altona North',
 'Altona',
 'Ardeer',
 'Armadale',
 'Aspendale Gardens',
 'Aspendale',
 'Attwood',
 'Auburn',
 'Keilor',
 'Avondale Heights',
 'Balaclava',
 'Balwyn North',
 'Kensington',
 'Kew East',
 'Keysborough',
 'Kilsyth South',
 'Bayswater North',
 'Bayswater',
 'Belgrave Heights',
 'Belgrave South',
 'Belgrave',
 'Kilsyth',
 'Kings Park',
 'Berwick',
 'Kingsville',
 'Knoxfield',
 'Kooyong',
 'Lalor',
 'Langwarrin',
 'Bittern',
 'Laverton North',
 'Laverton',
 'Lilydale',
 'Lower Plenty',
 'Blairgowrie',
 'Bonbeach',
 'Boronia',
 'Lynbrook',
 'Lyndhurst',
 'Box Hill North',
 'Box Hill South',
 'Box Hill',
 'Braeside',
 'Braybrook',
 'Briar Hill',
 'Lysterfield',
 'Macleod',
 'Maidstone',
 'Broadmeadows',
 'Malvern',
 'Meadow Heights',
 'Melbourne Airport',
 'Brookfield',
 'Brooklyn',
 'Brunswick East',
 'Brunswick',
 'Melton South',
 'Mernda',
 'Bulleen',
 'Mill Park',
 'Monbulk',
 'Mont Albert North',
 'Burnside 

## Business Problem # 3
### Q. Which areas lack Indian Restaurants?
#### Hence we got the solution to business problem # 3 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#### Lets checkout the number of Indian Restaurants we were able to fetch using the FourSquares. There will be many duplicates among them, as we used 1000m distance. So lets check for duplicates and keep the first occuring restaurant in the list and delete rest.

In [55]:
indian_rest.shape

(235, 5)

In [56]:
duplicateRowsDF = indian_rest[indian_rest.duplicated(['ID','Name','Lat','Lng'])]
duplicateRowsDF.shape

(132, 5)

In [57]:
indian_rest.drop_duplicates(['ID','Name','Lat','Lng'],keep='first', inplace=True, ignore_index=True)

In [58]:
indian_rest.shape

(103, 5)

#### Fetaching the geograpgical localtion of Melbourne to map

In [59]:
address = 'Melbourne, VIC'
#-37.5939889 145.0337333
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.8142176 144.9631608


#### Now we can plot all the Indian Restaurants in Melbourne Suburbs on Maps

In [60]:
indian_restaurant_map = folium.Map(location=[latitude, longitude], zoom_start=10) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Melbourne',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(indian_restaurant_map)

<folium.features.CircleMarker at 0x7f98022e4a20>

In [61]:
# add the Indian restaurants as blue circle markers
for lat, lng, label, suburb in zip(indian_rest.Lat, indian_rest.Lng, indian_rest.Name, indian_rest.Suburb):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        #popup=label,
        popup=folium.Popup(label+','+suburb, parse_html=True, max_width=100),
        fill = True,
        fill_color='blue',
        fill_opacity=0.6,
        parse_html=True
    ).add_to(indian_restaurant_map)

<folium.features.CircleMarker at 0x7f9802298eb8>

<folium.features.CircleMarker at 0x7f98022ec828>

<folium.features.CircleMarker at 0x7f9802298780>

<folium.features.CircleMarker at 0x7f9802298f98>

<folium.features.CircleMarker at 0x7f9802298630>

<folium.features.CircleMarker at 0x7f98022aa390>

<folium.features.CircleMarker at 0x7f9802294198>

<folium.features.CircleMarker at 0x7f9802294c88>

<folium.features.CircleMarker at 0x7f98022987f0>

<folium.features.CircleMarker at 0x7f9802294748>

<folium.features.CircleMarker at 0x7f98022942b0>

<folium.features.CircleMarker at 0x7f9802294208>

<folium.features.CircleMarker at 0x7f98022aae10>

<folium.features.CircleMarker at 0x7f98022aad68>

<folium.features.CircleMarker at 0x7f98022aa048>

<folium.features.CircleMarker at 0x7f98022a6b70>

<folium.features.CircleMarker at 0x7f98022ecc50>

<folium.features.CircleMarker at 0x7f9802294e48>

<folium.features.CircleMarker at 0x7f98022a6f98>

<folium.features.CircleMarker at 0x7f98022a60f0>

<folium.features.CircleMarker at 0x7f98022a6358>

<folium.features.CircleMarker at 0x7f9802249048>

<folium.features.CircleMarker at 0x7f98022a6198>

<folium.features.CircleMarker at 0x7f9802298940>

<folium.features.CircleMarker at 0x7f98022aa278>

<folium.features.CircleMarker at 0x7f9802249940>

<folium.features.CircleMarker at 0x7f98022a6438>

<folium.features.CircleMarker at 0x7f98022499b0>

<folium.features.CircleMarker at 0x7f9802249588>

<folium.features.CircleMarker at 0x7f980225c780>

<folium.features.CircleMarker at 0x7f980225cf28>

<folium.features.CircleMarker at 0x7f98022a62b0>

<folium.features.CircleMarker at 0x7f98022494e0>

<folium.features.CircleMarker at 0x7f980225ce10>

<folium.features.CircleMarker at 0x7f980225c5f8>

<folium.features.CircleMarker at 0x7f980225cb00>

<folium.features.CircleMarker at 0x7f980223f0f0>

<folium.features.CircleMarker at 0x7f980223f588>

<folium.features.CircleMarker at 0x7f980223f2e8>

<folium.features.CircleMarker at 0x7f980225cef0>

<folium.features.CircleMarker at 0x7f98022aa080>

<folium.features.CircleMarker at 0x7f98022e4400>

<folium.features.CircleMarker at 0x7f980223f9e8>

<folium.features.CircleMarker at 0x7f980223f978>

<folium.features.CircleMarker at 0x7f980225a390>

<folium.features.CircleMarker at 0x7f980225a710>

<folium.features.CircleMarker at 0x7f980225a198>

<folium.features.CircleMarker at 0x7f980223f470>

<folium.features.CircleMarker at 0x7f980225ada0>

<folium.features.CircleMarker at 0x7f980225c668>

<folium.features.CircleMarker at 0x7f980223f630>

<folium.features.CircleMarker at 0x7f98022c0470>

<folium.features.CircleMarker at 0x7f9802204940>

<folium.features.CircleMarker at 0x7f980225ad30>

<folium.features.CircleMarker at 0x7f9802204320>

<folium.features.CircleMarker at 0x7f98022143c8>

<folium.features.CircleMarker at 0x7f98022e4b70>

<folium.features.CircleMarker at 0x7f98022149b0>

<folium.features.CircleMarker at 0x7f9802204898>

<folium.features.CircleMarker at 0x7f98022044e0>

<folium.features.CircleMarker at 0x7f9802214c88>

<folium.features.CircleMarker at 0x7f9802214a20>

<folium.features.CircleMarker at 0x7f9802214780>

<folium.features.CircleMarker at 0x7f9802204470>

<folium.features.CircleMarker at 0x7f9802214550>

<folium.features.CircleMarker at 0x7f9802234c18>

<folium.features.CircleMarker at 0x7f9802234470>

<folium.features.CircleMarker at 0x7f9802234668>

<folium.features.CircleMarker at 0x7f9802234860>

<folium.features.CircleMarker at 0x7f98022349e8>

<folium.features.CircleMarker at 0x7f9802ea4c88>

<folium.features.CircleMarker at 0x7f98022349b0>

<folium.features.CircleMarker at 0x7f9802ea46a0>

<folium.features.CircleMarker at 0x7f9802ea4630>

<folium.features.CircleMarker at 0x7f980220ae80>

<folium.features.CircleMarker at 0x7f9802ea4048>

<folium.features.CircleMarker at 0x7f980220aa90>

<folium.features.CircleMarker at 0x7f980220add8>

<folium.features.CircleMarker at 0x7f980220a978>

<folium.features.CircleMarker at 0x7f980220a588>

<folium.features.CircleMarker at 0x7f980220a898>

<folium.features.CircleMarker at 0x7f980221e208>

<folium.features.CircleMarker at 0x7f980221e860>

<folium.features.CircleMarker at 0x7f980221eda0>

<folium.features.CircleMarker at 0x7f98021e5240>

<folium.features.CircleMarker at 0x7f98021e5208>

<folium.features.CircleMarker at 0x7f980221e748>

<folium.features.CircleMarker at 0x7f98021e5668>

<folium.features.CircleMarker at 0x7f980220ab70>

<folium.features.CircleMarker at 0x7f980221eb38>

<folium.features.CircleMarker at 0x7f98021e5908>

<folium.features.CircleMarker at 0x7f980221e8d0>

<folium.features.CircleMarker at 0x7f98021e8908>

<folium.features.CircleMarker at 0x7f9802ea4668>

<folium.features.CircleMarker at 0x7f980220a048>

<folium.features.CircleMarker at 0x7f98021e8080>

<folium.features.CircleMarker at 0x7f98021e8dd8>

<folium.features.CircleMarker at 0x7f98021e8f60>

<folium.features.CircleMarker at 0x7f98021ed9e8>

<folium.features.CircleMarker at 0x7f98021e5d30>

<folium.features.CircleMarker at 0x7f98021e8a20>

<folium.features.CircleMarker at 0x7f98021ed940>

<folium.features.CircleMarker at 0x7f98021ed898>

In [62]:
indian_restaurant_map

# Getting Ratings / Likes for each restaurant

#### Single Restaurant

In [63]:
id = '5a593d245c683829c6e59cbd'

url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(
        id,
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION)

print(url) 

results = requests.get(url).json() 
likes_count = results['response']['likes']['count']
likes_count

https://api.foursquare.com/v2/venues/5a593d245c683829c6e59cbd/likes?client_id=J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ&client_secret=2UP1CVJ5N0FC5KGF5FKLY4SG5CTMF3HQWTFJS20FEL5R4WGN&v=20180604


0

#### Fetch the likes for all the indian restaurants in Melbourne

In [64]:
indian_rest.head()

Unnamed: 0,Suburb,ID,Name,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,-37.83111,144.95362


In [65]:
#function to fetch
def get_venue_like(id):
    
    #set variables
    CLIENT_ID = 'J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ' # your Foursquare ID
    CLIENT_SECRET = '2UP1CVJ5N0FC5KGF5FKLY4SG5CTMF3HQWTFJS20FEL5R4WGN' # your Foursquare Secret
    VERSION = '20200401' # Foursquare API version

    #url to fetch data from foursquare api
    url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(id,
                                                                                                    CLIENT_ID,
                                                                                                    CLIENT_SECRET,
                                                                                                    VERSION)
    
    # get all the data
    results = requests.get(url).json()
    #print('##################')
    #print(results)
    likes_count = results['response']['likes']['count']
    
    return likes_count

In [66]:
# prepare neighborhood list that contains indian resturants and likes
column_names=['Suburb', 'ID','Name','Likes','Lat','Lng']
indian_rest_mel=pd.DataFrame(columns=column_names)
for row in indian_rest.values.tolist():
    try:
        Suburb, ID, Name, Latitude, Longitude=row
        likes = get_venue_like(ID)
    except:
        print("Oops!", sys.exc_info()[0], "occurred while trying for ",Name, Suburb)
        print("Trying Again!!")
    indian_rest_mel = indian_rest_mel.append({'Suburb': Suburb,
                                              'ID': ID,
                                              'Name' : Name,
                                              'Likes' : likes,
                                              'Lat' : Latitude,
                                              'Lng' : Longitude
                                              }, ignore_index=True)

In [67]:
indian_rest_mel

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.831110,144.953620
...,...,...,...,...,...,...
98,Upwey,4e0ee71f18a8bf9784d1c2f1,Jai Ho Indian Takeaway,0,-37.902920,145.329860
99,Vermont South,4b73bc89f964a5201eba2de3,Handis Indian Restaurant,2,-37.836632,145.195989
100,Wantirna South,4bf5f3b2d4cdb713ea3f84fe,Khazana Indian Restaurant,2,-37.869397,145.244062
101,Williamstown North,51d7dc23498e1aa1c2fe510b,Raga Indian Cuisine,1,-37.861763,144.902595


# Clustering the Indian Restaurants based on the user likes and rendering them on Map based on the cluster

#### Let's check how many venues were returned for each suburb

In [68]:
df_bp2 = indian_rest_mel
df_bp2

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.831110,144.953620
...,...,...,...,...,...,...
98,Upwey,4e0ee71f18a8bf9784d1c2f1,Jai Ho Indian Takeaway,0,-37.902920,145.329860
99,Vermont South,4b73bc89f964a5201eba2de3,Handis Indian Restaurant,2,-37.836632,145.195989
100,Wantirna South,4bf5f3b2d4cdb713ea3f84fe,Khazana Indian Restaurant,2,-37.869397,145.244062
101,Williamstown North,51d7dc23498e1aa1c2fe510b,Raga Indian Cuisine,1,-37.861763,144.902595


Perform groupby operation on the "Suburb" field and order by the number of Indian Restaurants. Then get the top 5 suburbs with most Indian Restaurants

In [69]:
df_bp2.groupby('Suburb')['ID'].count().sort_values(ascending=False).head(5)

Suburb
Docklands         10
Prahran            9
Albert Park        5
East Melbourne     5
South Wharf        5
Name: ID, dtype: int64

These are the suburbs with most number of Indian Restaurants. Therefore we can conclude that these suburbs have the most number of customers for Indian Food. Hence these are the best suburbs to start a new Indian Restaurant

## Business Problem # 2
### Q. Find the top 3 suburbs for starting an Indian Restaurant?
#### Hence we got the solution to business problem # 2 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

From the above information, we can even deduce that with 10 Indian Restaurants in the reach, Docklands is the suburb to stay in, if one want to be in reach of multiple Indian Restaurants.

## Business Problem # 4
### Q. What is the best place to stay in Melbourne, if you want to stay close to Indian Food?Find the top 3 suburbs for starting an Indian Restaurant?
#### Hence we got the solution to business problem # 4 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

From the above information, we can get that with almost 10 restaurants in the reach respectively Docklands and Prahran are the go to areas if one wishes to have good Indian Food.

## Business Problem # 5
### Q. What is the best location to go if you wish to eat good Indian Food?
#### Hence we got the solution to business problem # 5 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

## Pre-Processing

As you can see, all other columns other than "Likes" in this dataset is a categorical variable. k-means algorithm isn't directly applicable to categorical variables because Euclidean distance function isn't really meaningful for discrete variables. So, lets drop these columns and run clustering.

In [70]:
df = indian_rest_mel[['Likes']]
df.head()

Unnamed: 0,Likes
0,0
1,0
2,0
3,0
4,1


## Normalizing over the standard deviation

Now let's normalize the dataset. But why do we need normalization in the first place? Normalization is a statistical method that helps mathematical-based algorithms to interpret features with different magnitudes and distributions equally. We use StandardScaler() to normalize our dataset.

In [71]:
from sklearn.preprocessing import StandardScaler
X = df.values
X = np.nan_to_num(X)
Clus_dataSet = StandardScaler().fit_transform(X)
Clus_dataSet



array([[-0.30575417],
       [-0.30575417],
       [-0.30575417],
       [-0.30575417],
       [-0.14342077],
       [ 0.18124603],
       [-0.30575417],
       [ 0.50591284],
       [ 0.01891263],
       [-0.30575417],
       [ 0.01891263],
       [-0.14342077],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.30575417],
       [-0.14342077],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.14342077],
       [ 0.01891263],
       [-0.30575417],
       [-0.14342077],
       [-0.14342077],
       [ 0.01891263],
       [-0.30575417],
       [ 0.99291304],
       [-0.30575417],
       [-0.14342077],
       [-0.30575417],
       [-0.30575417],
       [-0.14342077],
       [-0.30575417],
       [-0.30575417],
       [-0.14342077],
       [-0.14342077],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.30575417],
       [-0.30575417],
       [ 0.50591284],
       [-0

<h2 id="modeling">Modeling</h2>

Using the K-means clustering we can do modeling much easier, we need not guess it manually which cluster each restaurant falls into.

Lets apply k-means on our dataset, and take look at cluster labels.

In [72]:
clusterNum = 4
k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12)
k_means.fit(X)
labels = k_means.labels_
print(labels)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=4, n_init=12, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

[0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 3 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 0 0 3 0 0 0 0 0
 0 0 0 0 0 0 0 0 3 3 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0]


<h2 id="insights">Insights</h2>
We assign the labels to each row in dataframe.

In [73]:
indian_rest_mel["Clus_km"] = labels
indian_rest_mel.head(5)

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983,0
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784,0
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427,0
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201,0
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.83111,144.95362,0


#### Fetching Melbourne geo location for mapping

In [74]:
address = 'Melbourne, VIC'
#-37.5939889 145.0337333
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.8142176 144.9631608


In [75]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(clusterNum)
ys = [i + x + (i*x)**2 for i in range(clusterNum)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, suburb, name, cluster in zip(indian_rest_mel['Lat'], indian_rest_mel['Lng'], indian_rest_mel['Suburb'], 
                                           indian_rest_mel['Name'], indian_rest_mel['Clus_km']):
    label = folium.Popup(str(name) + str(suburb) +' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<folium.features.CircleMarker at 0x7f98020e90f0>

<folium.features.CircleMarker at 0x7f980207bac8>

<folium.features.CircleMarker at 0x7f980207b518>

<folium.features.CircleMarker at 0x7f980207b8d0>

<folium.features.CircleMarker at 0x7f980207b4e0>

<folium.features.CircleMarker at 0x7f980207bf60>

<folium.features.CircleMarker at 0x7f98020b2080>

<folium.features.CircleMarker at 0x7f98020b2c50>

<folium.features.CircleMarker at 0x7f98020de2b0>

<folium.features.CircleMarker at 0x7f98020e9780>

<folium.features.CircleMarker at 0x7f980207b240>

<folium.features.CircleMarker at 0x7f98020e97f0>

<folium.features.CircleMarker at 0x7f98020e96a0>

<folium.features.CircleMarker at 0x7f98020b2390>

<folium.features.CircleMarker at 0x7f98020b2fd0>

<folium.features.CircleMarker at 0x7f98020b2a20>

<folium.features.CircleMarker at 0x7f98020b2b00>

<folium.features.CircleMarker at 0x7f9802072c88>

<folium.features.CircleMarker at 0x7f980207b048>

<folium.features.CircleMarker at 0x7f98020721d0>

<folium.features.CircleMarker at 0x7f98020729e8>

<folium.features.CircleMarker at 0x7f98020722e8>

<folium.features.CircleMarker at 0x7f9802072eb8>

<folium.features.CircleMarker at 0x7f980206ca20>

<folium.features.CircleMarker at 0x7f98020dea90>

<folium.features.CircleMarker at 0x7f980206c668>

<folium.features.CircleMarker at 0x7f980206c198>

<folium.features.CircleMarker at 0x7f980206c208>

<folium.features.CircleMarker at 0x7f980206ca90>

<folium.features.CircleMarker at 0x7f980206cf60>

<folium.features.CircleMarker at 0x7f97fffe2b00>

<folium.features.CircleMarker at 0x7f9802072f28>

<folium.features.CircleMarker at 0x7f980206c8d0>

<folium.features.CircleMarker at 0x7f97fffe2e10>

<folium.features.CircleMarker at 0x7f97fffe24e0>

<folium.features.CircleMarker at 0x7f97fffe2240>

<folium.features.CircleMarker at 0x7f97fffe2438>

<folium.features.CircleMarker at 0x7f97fffdcac8>

<folium.features.CircleMarker at 0x7f980206cef0>

<folium.features.CircleMarker at 0x7f97fffe2b38>

<folium.features.CircleMarker at 0x7f97fffdc4e0>

<folium.features.CircleMarker at 0x7f97fffdcb00>

<folium.features.CircleMarker at 0x7f97fffdcb38>

<folium.features.CircleMarker at 0x7f97fffdc860>

<folium.features.CircleMarker at 0x7f97ffff0b38>

<folium.features.CircleMarker at 0x7f97fffe2f28>

<folium.features.CircleMarker at 0x7f97fffdcba8>

<folium.features.CircleMarker at 0x7f97ffff0400>

<folium.features.CircleMarker at 0x7f97ffff04a8>

<folium.features.CircleMarker at 0x7f97ffff0940>

<folium.features.CircleMarker at 0x7f97ffff07f0>

<folium.features.CircleMarker at 0x7f97ffff09b0>

<folium.features.CircleMarker at 0x7f97fffdc240>

<folium.features.CircleMarker at 0x7f97fffdca20>

<folium.features.CircleMarker at 0x7f98000164a8>

<folium.features.CircleMarker at 0x7f9800016518>

<folium.features.CircleMarker at 0x7f9800016d30>

<folium.features.CircleMarker at 0x7f9800016908>

<folium.features.CircleMarker at 0x7f9800016a20>

<folium.features.CircleMarker at 0x7f97ffff0278>

<folium.features.CircleMarker at 0x7f97ffff0cc0>

<folium.features.CircleMarker at 0x7f97fffc0518>

<folium.features.CircleMarker at 0x7f97fffc0588>

<folium.features.CircleMarker at 0x7f97fffc09e8>

<folium.features.CircleMarker at 0x7f97fffc0860>

<folium.features.CircleMarker at 0x7f97fffc0668>

<folium.features.CircleMarker at 0x7f98000167f0>

<folium.features.CircleMarker at 0x7f97fffc06a0>

<folium.features.CircleMarker at 0x7f97fffbfe48>

<folium.features.CircleMarker at 0x7f97fffbf278>

<folium.features.CircleMarker at 0x7f97fffbf198>

<folium.features.CircleMarker at 0x7f97fffbfb38>

<folium.features.CircleMarker at 0x7f97fffbf8d0>

<folium.features.CircleMarker at 0x7f97fffc0cf8>

<folium.features.CircleMarker at 0x7f97fffbfb70>

<folium.features.CircleMarker at 0x7f97fffbffd0>

<folium.features.CircleMarker at 0x7f97fff64fd0>

<folium.features.CircleMarker at 0x7f97fff64278>

<folium.features.CircleMarker at 0x7f97fff646d8>

<folium.features.CircleMarker at 0x7f97fff641d0>

<folium.features.CircleMarker at 0x7f97fff64b00>

<folium.features.CircleMarker at 0x7f97fff64940>

<folium.features.CircleMarker at 0x7f97fff5f898>

<folium.features.CircleMarker at 0x7f97fff5f550>

<folium.features.CircleMarker at 0x7f97fff5ff28>

<folium.features.CircleMarker at 0x7f97fff5fef0>

<folium.features.CircleMarker at 0x7f97fff5fcc0>

<folium.features.CircleMarker at 0x7f97fffc0e48>

<folium.features.CircleMarker at 0x7f97fff5fac8>

<folium.features.CircleMarker at 0x7f97fff79ef0>

<folium.features.CircleMarker at 0x7f97fff79518>

<folium.features.CircleMarker at 0x7f97fff796d8>

<folium.features.CircleMarker at 0x7f97fff79390>

<folium.features.CircleMarker at 0x7f97fff79668>

<folium.features.CircleMarker at 0x7f97fff5f080>

<folium.features.CircleMarker at 0x7f97fff5f4a8>

<folium.features.CircleMarker at 0x7f97fff7f550>

<folium.features.CircleMarker at 0x7f97fff7fc18>

<folium.features.CircleMarker at 0x7f97fff7fc50>

<folium.features.CircleMarker at 0x7f97fff7ff98>

<folium.features.CircleMarker at 0x7f97fff7fa90>

<folium.features.CircleMarker at 0x7f97fff79550>

<folium.features.CircleMarker at 0x7f97fff79f98>

## Business Problem # 1
### Q. List and visualize all the major parts of Melbourne City which have popular Indian Restaurants
#### The above rendered map is the solution to business problem # 1

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

# Examine Clusters

Indian Restaurants are clustered based on the user rating into 3 different clusters.

Cluster 1 --> Low or No Rating

Cluster 2 --> With Highest Rating

Cluster 3 --> Medium Rating

#### Custer 1

In [76]:
indian_rest_mel.loc[indian_rest_mel['Clus_km'] == 0, indian_rest_mel.columns[[0] + list(range(1, indian_rest_mel.shape[1]))]]

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983,0
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784,0
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427,0
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201,0
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.831110,144.953620,0
...,...,...,...,...,...,...,...
98,Upwey,4e0ee71f18a8bf9784d1c2f1,Jai Ho Indian Takeaway,0,-37.902920,145.329860,0
99,Vermont South,4b73bc89f964a5201eba2de3,Handis Indian Restaurant,2,-37.836632,145.195989,0
100,Wantirna South,4bf5f3b2d4cdb713ea3f84fe,Khazana Indian Restaurant,2,-37.869397,145.244062,0
101,Williamstown North,51d7dc23498e1aa1c2fe510b,Raga Indian Cuisine,1,-37.861763,144.902595,0


#### Custer 2

In [77]:
indian_rest_mel.loc[indian_rest_mel['Clus_km'] == 1, indian_rest_mel.columns[[0] + list(range(1, indian_rest_mel.shape[1]))]]

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
91,South Wharf,4b1cbc8ff964a520570924e3,Red Pepper,58,-37.81146,144.972623,1


#### Custer 3

In [78]:
indian_rest_mel.loc[indian_rest_mel['Clus_km'] == 2, indian_rest_mel.columns[[0] + list(range(1, indian_rest_mel.shape[1]))]]

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
89,South Wharf,4b307875f964a520c0f924e3,Flora Indian Restaurant,21,-37.817592,144.966626,2


From the above clustering, we can deduce that "Red Pepper" Restaurant has most number of likes on FourSquares and it must serve pretty good Indian Food and has large satisfied customers.

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX