# Downloading the dependencies

In [119]:
# for web scraping
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import platform

try:
    import scrapy
except:
    !conda install -c conda-forge scrapy --yes
    import scrapy
    
from scrapy.crawler import CrawlerProcess
import scrapy.crawler as crawler
from multiprocessing import Process, Queue
from twisted.internet import reactor

# for downloading file to notebook
import os

# library to handle data in a vectorized manner
import numpy as np

# library for data analsysis
import pandas as pd

# for working on json
import json

# convert an address into latitude and longitude values
try:
    from geopy.geocoders import Nominatim
except:
    !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
    from geopy.geocoders import Nominatim

# library to handle requests
import requests

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# map rendering library
!conda install -c conda-forge folium=0.5.0 --yes
import folium

#importing sys
import sys

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


# Download and Explore Dataset

There are around 400+ suburbs around Melbourne. There is no easy source available online, so I am "Web Scraping" the list of suburbs from wikipedia. The list is spread in following two pages which can be obtained by page scraping -  
Source – 
a) https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pageuntil=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages

b) https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pagefrom=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages

This information has to be refined to be used later on.


## Web Scraping using Scrapy

Scrapy is a fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing.

You can get more information from - https://docs.scrapy.org/en/latest/

This class creates a simple pipeline that writes all found items to a JSON file, where each line contains one JSON element.

In [120]:
class JsonWriterPipeline(object):

    def open_spider(self, spider):
        self.file = open('Mel_Subs.jl', 'w')

    def close_spider(self, spider):
        self.file.close()

    def process_item(self, item, spider):
        line = json.dumps(dict(item)) + "\n"
        self.file.write(line)
        return item

## Define the spider
The QuotesSpider class defines from which URLs to start crawling and which values to retrieve. I set the logging level of the crawler to warning, otherwise the notebook is overloaded with DEBUG messages about the retrieved data.

In [121]:
import logging

class QuotesSpider(scrapy.Spider):
    name = "quotes"
    start_urls = [
        'https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pageuntil=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages',
        'https://en.wikipedia.org/w/index.php?title=Category:Suburbs_of_Melbourne&pagefrom=Keilor%2C+Victoria%0AKeilor%2C+Victoria#mw-pages',
    ]
    
    # Delete the JSON / JL file if exists - useful when run multiple times
    filePath='Mel_Subs.json'
    if os.path.exists(filePath):
        os.remove(filePath)
        print('Deleted "Mel_Subs.json"')
    else:
        print("Can not delete the file as it doesn't exists")
        
    filePath='Mel_Subs.jl'
    if os.path.exists(filePath):
        os.remove(filePath)
        print('Deleted "Mel_Subs.jl"')
    else:
        print("Can not delete the file as it doesn't exists")
    
    custom_settings = {
        'LOG_LEVEL': logging.WARNING,
        'ITEM_PIPELINES': {'__main__.JsonWriterPipeline': 1}, # Used for pipeline 1
        'FEED_FORMAT':'json',                                 # Used for pipeline 2
        'FEED_URI': 'Mel_Subs.json'                        # Used for pipeline 2
    }
    
    def parse(self, response):
        #for quote in response.css('div.quote'):
        for subs in response.xpath("//div[@id='mw-pages']//div//ul[1]//li"):
            yield {
                #'text': quote.css('span.text::text').extract_first(),
                'name': subs.css('li a::text').extract_first(),
            }

Deleted "Mel_Subs.json"
Deleted "Mel_Subs.jl"


In [122]:
# the wrapper to make it run more times
def run_spider(spider):
    def f(q):
        try:
            runner = crawler.CrawlerRunner()
            #process = CrawlerProcess({
            #    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
            #})
            deferred = runner.crawl(spider)
            #deferred = process.crawl(spider)
            deferred.addBoth(lambda _: reactor.stop())
            reactor.run()
            q.put(None)
        except Exception as e:
            q.put(e)

    q = Queue()
    p = Process(target=f, args=(q,))
    p.start()
    result = q.get()
    p.join()

    if result is not None:
        raise result

## Start the crawler

In [123]:
run_spider(QuotesSpider)

  exporter = cls(crawler)


## Check the files
Verify that the files has been created on disk. As we can observe the files are both created and have data. The .jl file has line separated JSON elements, while the .json file has one big JSON array containing all the quotes.

In [124]:
ll Mel_Subs.*

-rw-r--r-- 1 jupyterlab 13214 Jul  2 21:35 Mel_Subs.jl
-rw-r--r-- 1 jupyterlab 13613 Jul  2 21:35 Mel_Subs.json


In [125]:
!tail -n 2 Mel_Subs.jl

{"name": "Keilor North, Victoria"}
{"name": "Keilor Park, Victoria"}


In [126]:
!tail -n 2 Mel_Subs.json

{"name": "Keilor Park, Victoria"}
]

## Create dataframes
Pandas can now be used to create dataframes and save the frames to pickles. The .sjon file can be loaded directly into a frame, whereas for the .jl file we need to specify the JSON objects are divided per line. 

NOTE - There is an issue in loading .json file to dataframe as it has multiple json objects in unexpected format.

In [127]:
#dfjson = pd.read_json('Mel_Subs.json', lines=True)
#dfjson


Hence .jl file is used to load the dataframe which has the web scraped list, one in each line

In [128]:

dfjl= pd.read_json('Mel_Subs.jl', lines=True)
dfjl.columns=['suburb']
dfjl

Unnamed: 0,suburb
0,List of Melbourne suburbs
1,"Abbotsford, Victoria"
2,"Aberfeldie, Victoria"
3,"Aintree, Victoria"
4,"Airport West, Victoria"
...,...
392,"Yallambie, Victoria"
393,"Yarraville, Victoria"
394,"Keilor Lodge, Victoria"
395,"Keilor North, Victoria"


# Data Manuipulation for the ease of using

dfjl has a row header = "List of Melbourne suburbs" as one of the value, which must be deleted / dropped

In [129]:
#Searching for the row in the dataframe
dfjl[dfjl['suburb'].str.contains("List")]

Unnamed: 0,suburb
0,List of Melbourne suburbs


In [130]:
#Searching for the row in the dataframe
delete_row = dfjl[dfjl['suburb'].str.contains("List")].index

#Deleting the the row and resetting index
df_subs = dfjl.drop(delete_row).reset_index(drop=True)
df_subs

Unnamed: 0,suburb
0,"Abbotsford, Victoria"
1,"Aberfeldie, Victoria"
2,"Aintree, Victoria"
3,"Airport West, Victoria"
4,"Albanvale, Victoria"
...,...
391,"Yallambie, Victoria"
392,"Yarraville, Victoria"
393,"Keilor Lodge, Victoria"
394,"Keilor North, Victoria"


There need not be "Victoria", in every row, which can be removed

In [131]:
# dropping null value columns to avoid errors 
df_subs.dropna(inplace = True)

# new data frame with split value columns 
new = df_subs['suburb'].str.split(",", n = 1, expand = True)
df_subs['suburb'] = new[0]

# dropping null value columns to avoid errors 
df_subs.dropna(inplace = True)
df_subs

Unnamed: 0,suburb
0,Abbotsford
1,Aberfeldie
2,Aintree
3,Airport West
4,Albanvale
...,...
391,Yallambie
392,Yarraville
393,Keilor Lodge
394,Keilor North


# Logitude & Latitudes for Mapping

We require the information of latitude and logitide values of all the suburbs to map them effectively. This information can be collected using - geocoder. But since it is unstable most of the times. It can be downloaded from the available csv file which has all the Australia Suburbs Information - Postal Code,State, Name of Suburb, Latitude, Longitude.

In [132]:
df_aus = pd.read_csv('Aus-Postcodes.csv',skipinitialspace=True)
df_aus

Unnamed: 0,postcode,suburb,state,latitude,longitude
0,200,Australian National University,ACT,-35.28,149.12
1,221,Barton,ACT,-35.20,149.1
2,800,Darwin,NT,-12.80,130.96
3,801,Darwin,NT,-12.80,130.96
4,804,Parap,NT,-12.43,130.84
...,...,...,...,...,...
16737,9023,Brisbane GPO Boxes,QLD,0.00,0
16738,9464,Northgate MC,QLD,0.00,0
16739,9726,Gold Coast MC,QLD,0.00,0
16740,9728,Gold Coast MC,QLD,0.00,0


We only require the State of Victoria's details. Its state code is 'VIC'. Hence I am going to filter out the 'VIC' state's data from the available large list of Australian Suburbs. Even the reset of index is done.

In [133]:
df_vic = df_aus[df_aus['state']=='VIC'].reset_index(drop=True)
df_vic

Unnamed: 0,postcode,suburb,state,latitude,longitude
0,3000,Melbourne,VIC,-37.81,144.97
1,3001,Melbourne,VIC,-38.37,144.77
2,3002,East Melbourne,VIC,-37.82,144.99
3,3003,West Melbourne,VIC,-37.81,144.94
4,3004,Melbourne,VIC,-37.84,144.98
...,...,...,...,...,...
3228,8396,Melbourne,VIC,-38.37,144.77
3229,8399,Melbourne,VIC,-38.37,144.77
3230,8576,Ivanhoe,VIC,-37.76,145.04
3231,8627,Camberwell,VIC,-37.84,145.06


We are not going to use postcode and state information neither in calculation nor in mapping. Hence we can drop those columns.

In [134]:
columns = ['suburb','latitude','longitude']
df_vic = df_vic[columns]
df_vic

Unnamed: 0,suburb,latitude,longitude
0,Melbourne,-37.81,144.97
1,Melbourne,-38.37,144.77
2,East Melbourne,-37.82,144.99
3,West Melbourne,-37.81,144.94
4,Melbourne,-37.84,144.98
...,...,...,...
3228,Melbourne,-38.37,144.77
3229,Melbourne,-38.37,144.77
3230,Ivanhoe,-37.76,145.04
3231,Camberwell,-37.84,145.06


If we observe the rows in df_vic clearly, we can find multiple fields for "Melbourne" suburb. which is not required. And if if those records are not dropped would distort the map.

In [135]:
#df_vic[df_vic['suburb']=='Melbourne']

In [136]:
#Dropping all the other rows with suburb as "Melbourne" other than at 0th index with latitide = '-37.81' and resetting the index
#df_vic = df_vic.drop(df_vic[(df_vic['suburb']=='Melbourne') & (df_vic['latitude']!=-37.81)].index).reset_index(drop=True)
#df_vic

In [137]:
# saving this to use later-on
df_temp = df_vic[(df_vic['suburb']=='Melbourne') & (df_vic['latitude']==-37.81)]
df_temp

Unnamed: 0,suburb,latitude,longitude
0,Melbourne,-37.81,144.97


Now that we have two data frames - 1. List of Melbourne Suburbs[df_subs] & 2. List of all suburbs in Victoria State with latitide and logitude data[df_vic]. We can merge both the dataframes and get the required information of MELBOURNE SUBURBS.

In [138]:
#Merge the df_subs with df_vic on 'suburb'
df_Mel_Subs = pd.merge(df_subs,df_vic,on=['suburb'])
df_Mel_Subs.dropna(inplace = True)
df_Mel_Subs

Unnamed: 0,suburb,latitude,longitude
0,Abbotsford,-37.80,145
1,Aberfeldie,-37.76,144.9
2,Airport West,-37.71,144.89
3,Albanvale,-37.75,144.77
4,Albert Park,-37.84,144.96
...,...,...,...
374,Yallambie,-37.73,145.07
375,Yarraville,-37.82,144.89
376,Keilor Lodge,-37.73,144.81
377,Keilor North,-37.72,144.83


There might be few suburbs whose latitude and longitude information is populated as 0.0 or 0. This can be fetched by

In [139]:
df_Mel_Subs[(df_Mel_Subs['latitude']==0) | (df_Mel_Subs['longitude']==0)]

Unnamed: 0,suburb,latitude,longitude
224,Docklands,0.0,0


This row can be dropped

In [140]:
df_Mel_Subs = df_Mel_Subs.drop(df_Mel_Subs[(df_Mel_Subs['latitude']==0) | (df_Mel_Subs['longitude']==0)].index).reset_index(drop=True)
df_Mel_Subs

Unnamed: 0,suburb,latitude,longitude
0,Abbotsford,-37.80,145
1,Aberfeldie,-37.76,144.9
2,Airport West,-37.71,144.89
3,Albanvale,-37.75,144.77
4,Albert Park,-37.84,144.96
...,...,...,...
373,Yallambie,-37.73,145.07
374,Yarraville,-37.82,144.89
375,Keilor Lodge,-37.73,144.81
376,Keilor North,-37.72,144.83


#### By carefully oberving the list I found out that this particular dataframe does not contain - "Melbourne CBD" / "Melbourne" or normally called as CBD. We can add it manually.

In [141]:
df_temp

Unnamed: 0,suburb,latitude,longitude
0,Melbourne,-37.81,144.97


In [142]:
df_Mel_Subs = df_Mel_Subs.append(df_temp, ignore_index = True)
df_Mel_Subs

Unnamed: 0,suburb,latitude,longitude
0,Abbotsford,-37.80,145
1,Aberfeldie,-37.76,144.9
2,Airport West,-37.71,144.89
3,Albanvale,-37.75,144.77
4,Albert Park,-37.84,144.96
...,...,...,...
374,Yarraville,-37.82,144.89
375,Keilor Lodge,-37.73,144.81
376,Keilor North,-37.72,144.83
377,Keilor Park,-37.71,144.89


In [143]:
df_Mel_Subs.dtypes

suburb        object
latitude     float64
longitude     object
dtype: object

In [144]:
df_Mel_Subs['longitude'] = df_Mel_Subs['longitude'].astype(float)
df_Mel_Subs['latitude'] = df_Mel_Subs['latitude'].astype(float)
df_Mel_Subs.dtypes

suburb        object
latitude     float64
longitude    float64
dtype: object

### Use geopy library to get the latitude and longitude values of Melbourne City.

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent <em>ny_explorer</em>, as shown below.

In [145]:
address = 'Melbourne, Australia'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Melbourne, Australia are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Melbourne, Australia are -37.8142176, 144.9631608.


### Create a map of Melbourne with its suburbs superimposed on top.

In [146]:
# create map of Melbourne using latitude and longitude values
map_melb = folium.Map(location=[latitude, longitude], zoom_start=9)

# add markers to map
for lat, lng, suburb in zip(df_Mel_Subs['latitude'], df_Mel_Subs['longitude'], df_Mel_Subs['suburb']):
    label = '{}, {}'.format(suburb,'VIC')
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_melb)

<folium.features.CircleMarker at 0x7f97ff53bba8>

<folium.features.CircleMarker at 0x7f97ff516b70>

<folium.features.CircleMarker at 0x7f97ff5167b8>

<folium.features.CircleMarker at 0x7f97ff516fd0>

<folium.features.CircleMarker at 0x7f97ff5162e8>

<folium.features.CircleMarker at 0x7f97ff516898>

<folium.features.CircleMarker at 0x7f97ff516d68>

<folium.features.CircleMarker at 0x7f97ff5165f8>

<folium.features.CircleMarker at 0x7f97ffd6e208>

<folium.features.CircleMarker at 0x7f97ff516630>

<folium.features.CircleMarker at 0x7f97ff4f7ac8>

<folium.features.CircleMarker at 0x7f97ff4f76a0>

<folium.features.CircleMarker at 0x7f97ff4f7a58>

<folium.features.CircleMarker at 0x7f97ff516ac8>

<folium.features.CircleMarker at 0x7f97ff4f7470>

<folium.features.CircleMarker at 0x7f97ff510a90>

<folium.features.CircleMarker at 0x7f97ff510c88>

<folium.features.CircleMarker at 0x7f97ff510748>

<folium.features.CircleMarker at 0x7f97ff510518>

<folium.features.CircleMarker at 0x7f97ff510b38>

<folium.features.CircleMarker at 0x7f97ff5105c0>

<folium.features.CircleMarker at 0x7f97ff5160b8>

<folium.features.CircleMarker at 0x7f97ff510898>

<folium.features.CircleMarker at 0x7f97ff4da630>

<folium.features.CircleMarker at 0x7f97ff4dac88>

<folium.features.CircleMarker at 0x7f97ff4da1d0>

<folium.features.CircleMarker at 0x7f97ff4dac18>

<folium.features.CircleMarker at 0x7f97ff4dabe0>

<folium.features.CircleMarker at 0x7f97ff4b5c50>

<folium.features.CircleMarker at 0x7f97ff510550>

<folium.features.CircleMarker at 0x7f97ff510ac8>

<folium.features.CircleMarker at 0x7f97ff4b5b70>

<folium.features.CircleMarker at 0x7f97ff4b5eb8>

<folium.features.CircleMarker at 0x7f97ff4b53c8>

<folium.features.CircleMarker at 0x7f97ff4b50f0>

<folium.features.CircleMarker at 0x7f97ff4bcb38>

<folium.features.CircleMarker at 0x7f97ff4bcc88>

<folium.features.CircleMarker at 0x7f97ff4daba8>

<folium.features.CircleMarker at 0x7f97ff4b5d30>

<folium.features.CircleMarker at 0x7f97ff4bc9b0>

<folium.features.CircleMarker at 0x7f97ff4bc198>

<folium.features.CircleMarker at 0x7f97ff4bc8d0>

<folium.features.CircleMarker at 0x7f97ff4bc320>

<folium.features.CircleMarker at 0x7f97ff4bac50>

<folium.features.CircleMarker at 0x7f97ff4ba470>

<folium.features.CircleMarker at 0x7f97ff4b58d0>

<folium.features.CircleMarker at 0x7f97ff4b5978>

<folium.features.CircleMarker at 0x7f97ff4ba630>

<folium.features.CircleMarker at 0x7f97ff4baf98>

<folium.features.CircleMarker at 0x7f97ff4ba6a0>

<folium.features.CircleMarker at 0x7f97ff4683c8>

<folium.features.CircleMarker at 0x7f97ff4680b8>

<folium.features.CircleMarker at 0x7f97ff468fd0>

<folium.features.CircleMarker at 0x7f97ff4bcb70>

<folium.features.CircleMarker at 0x7f97ff4ba898>

<folium.features.CircleMarker at 0x7f97ff468710>

<folium.features.CircleMarker at 0x7f97ff468e48>

<folium.features.CircleMarker at 0x7f97ff468400>

<folium.features.CircleMarker at 0x7f97ff486c88>

<folium.features.CircleMarker at 0x7f97ff4866d8>

<folium.features.CircleMarker at 0x7f97ff4869e8>

<folium.features.CircleMarker at 0x7f97ff4ba550>

<folium.features.CircleMarker at 0x7f97ff486fd0>

<folium.features.CircleMarker at 0x7f97ff486940>

<folium.features.CircleMarker at 0x7f97ff4861d0>

<folium.features.CircleMarker at 0x7f97ff485390>

<folium.features.CircleMarker at 0x7f97ff4852e8>

<folium.features.CircleMarker at 0x7f97ff485b38>

<folium.features.CircleMarker at 0x7f97ff485c50>

<folium.features.CircleMarker at 0x7f97ff485cf8>

<folium.features.CircleMarker at 0x7f97ff485630>

<folium.features.CircleMarker at 0x7f97ff485978>

<folium.features.CircleMarker at 0x7f97ff42aa90>

<folium.features.CircleMarker at 0x7f97ff42ad30>

<folium.features.CircleMarker at 0x7f97ff42a5f8>

<folium.features.CircleMarker at 0x7f97ff42a320>

<folium.features.CircleMarker at 0x7f97ff485128>

<folium.features.CircleMarker at 0x7f97ff42a3c8>

<folium.features.CircleMarker at 0x7f97ff42a4a8>

<folium.features.CircleMarker at 0x7f97ff42a2b0>

<folium.features.CircleMarker at 0x7f97ff419c50>

<folium.features.CircleMarker at 0x7f97ff419c88>

<folium.features.CircleMarker at 0x7f97ff419a58>

<folium.features.CircleMarker at 0x7f97ff419588>

<folium.features.CircleMarker at 0x7f97ff419b70>

<folium.features.CircleMarker at 0x7f97ff4197f0>

<folium.features.CircleMarker at 0x7f97ff419ef0>

<folium.features.CircleMarker at 0x7f97ff3e86d8>

<folium.features.CircleMarker at 0x7f97ff3e8c50>

<folium.features.CircleMarker at 0x7f97ff3e8dd8>

<folium.features.CircleMarker at 0x7f97ff3e8da0>

<folium.features.CircleMarker at 0x7f97ff419828>

<folium.features.CircleMarker at 0x7f97ff3e8ac8>

<folium.features.CircleMarker at 0x7f97ff3e82e8>

<folium.features.CircleMarker at 0x7f97ff442b00>

<folium.features.CircleMarker at 0x7f97ff442cf8>

<folium.features.CircleMarker at 0x7f97ff442be0>

<folium.features.CircleMarker at 0x7f97ff442780>

<folium.features.CircleMarker at 0x7f97ff442978>

<folium.features.CircleMarker at 0x7f97ff442748>

<folium.features.CircleMarker at 0x7f97ff442048>

<folium.features.CircleMarker at 0x7f97ff3e0a90>

<folium.features.CircleMarker at 0x7f97ff3e0cc0>

<folium.features.CircleMarker at 0x7f97ff3e0438>

<folium.features.CircleMarker at 0x7f97ff3e0e48>

<folium.features.CircleMarker at 0x7f97ff3e05f8>

<folium.features.CircleMarker at 0x7f97ff3e8630>

<folium.features.CircleMarker at 0x7f97ff3e0be0>

<folium.features.CircleMarker at 0x7f97ff3e02e8>

<folium.features.CircleMarker at 0x7f97ff3f2e10>

<folium.features.CircleMarker at 0x7f97ff3f21d0>

<folium.features.CircleMarker at 0x7f97ff40b9b0>

<folium.features.CircleMarker at 0x7f97ff40bfd0>

<folium.features.CircleMarker at 0x7f97ff40bf60>

<folium.features.CircleMarker at 0x7f97ff4424a8>

<folium.features.CircleMarker at 0x7f97ff40b940>

<folium.features.CircleMarker at 0x7f97ff40b7f0>

<folium.features.CircleMarker at 0x7f97ff3f2908>

<folium.features.CircleMarker at 0x7f97ff3f2080>

<folium.features.CircleMarker at 0x7f97ff3f29e8>

<folium.features.CircleMarker at 0x7f97ff3f53c8>

<folium.features.CircleMarker at 0x7f97ff3e0240>

<folium.features.CircleMarker at 0x7f97ff3f2320>

<folium.features.CircleMarker at 0x7f97ff3f5908>

<folium.features.CircleMarker at 0x7f97ff3f5048>

<folium.features.CircleMarker at 0x7f97ff3f5d68>

<folium.features.CircleMarker at 0x7f97ff3fb2e8>

<folium.features.CircleMarker at 0x7f97ff3fb240>

<folium.features.CircleMarker at 0x7f97ff3a3630>

<folium.features.CircleMarker at 0x7f97ff3fb6a0>

<folium.features.CircleMarker at 0x7f97ff3f5828>

<folium.features.CircleMarker at 0x7f97ff3f5400>

<folium.features.CircleMarker at 0x7f97ff3fb3c8>

<folium.features.CircleMarker at 0x7f97ff3fb5c0>

<folium.features.CircleMarker at 0x7f97ff3a39e8>

<folium.features.CircleMarker at 0x7f97ff3a30b8>

<folium.features.CircleMarker at 0x7f97ff3fbba8>

<folium.features.CircleMarker at 0x7f97ff3a3a20>

<folium.features.CircleMarker at 0x7f97ff3a3160>

<folium.features.CircleMarker at 0x7f97ff3c4208>

<folium.features.CircleMarker at 0x7f97ff3c4588>

<folium.features.CircleMarker at 0x7f97ff3c4c18>

<folium.features.CircleMarker at 0x7f97ff3c4be0>

<folium.features.CircleMarker at 0x7f97ff3d5470>

<folium.features.CircleMarker at 0x7f97ff40b668>

<folium.features.CircleMarker at 0x7f97ff3d5128>

<folium.features.CircleMarker at 0x7f97ff3d5c18>

<folium.features.CircleMarker at 0x7f97ff3d5550>

<folium.features.CircleMarker at 0x7f97ff3d5400>

<folium.features.CircleMarker at 0x7f97ff3c4898>

<folium.features.CircleMarker at 0x7f97ff3a0da0>

<folium.features.CircleMarker at 0x7f97ff3c4048>

<folium.features.CircleMarker at 0x7f97ff3d5f28>

<folium.features.CircleMarker at 0x7f97ff3a0a58>

<folium.features.CircleMarker at 0x7f97ff3a0cc0>

<folium.features.CircleMarker at 0x7f97ff3a0550>

<folium.features.CircleMarker at 0x7f97ff3a01d0>

<folium.features.CircleMarker at 0x7f97ff35d748>

<folium.features.CircleMarker at 0x7f97ff37d668>

<folium.features.CircleMarker at 0x7f97ff37d2e8>

<folium.features.CircleMarker at 0x7f97ff3a3e48>

<folium.features.CircleMarker at 0x7f97ff3a0fd0>

<folium.features.CircleMarker at 0x7f97ff3a0b38>

<folium.features.CircleMarker at 0x7f97ff37db00>

<folium.features.CircleMarker at 0x7f97ff35d390>

<folium.features.CircleMarker at 0x7f97ff386860>

<folium.features.CircleMarker at 0x7f97ff3a09e8>

<folium.features.CircleMarker at 0x7f97ff35d198>

<folium.features.CircleMarker at 0x7f97ff35d550>

<folium.features.CircleMarker at 0x7f97ff37d048>

<folium.features.CircleMarker at 0x7f97ff386d68>

<folium.features.CircleMarker at 0x7f97ff3864a8>

<folium.features.CircleMarker at 0x7f97ff386e48>

<folium.features.CircleMarker at 0x7f97ff3a05c0>

<folium.features.CircleMarker at 0x7f97ff386898>

<folium.features.CircleMarker at 0x7f97ff386a90>

<folium.features.CircleMarker at 0x7f97ff384e48>

<folium.features.CircleMarker at 0x7f97ff384860>

<folium.features.CircleMarker at 0x7f97ff384b70>

<folium.features.CircleMarker at 0x7f97ff384f28>

<folium.features.CircleMarker at 0x7f97ff31ee48>

<folium.features.CircleMarker at 0x7f97ff35dc88>

<folium.features.CircleMarker at 0x7f97ff31edd8>

<folium.features.CircleMarker at 0x7f97ff384c88>

<folium.features.CircleMarker at 0x7f97ff384a90>

<folium.features.CircleMarker at 0x7f97ff384278>

<folium.features.CircleMarker at 0x7f97ff31e1d0>

<folium.features.CircleMarker at 0x7f97ff351668>

<folium.features.CircleMarker at 0x7f97ff31eac8>

<folium.features.CircleMarker at 0x7f97ff31e390>

<folium.features.CircleMarker at 0x7f97ff351160>

<folium.features.CircleMarker at 0x7f97ff351ac8>

<folium.features.CircleMarker at 0x7f97ff351940>

<folium.features.CircleMarker at 0x7f97ff326208>

<folium.features.CircleMarker at 0x7f97ff326908>

<folium.features.CircleMarker at 0x7f97ff32c518>

<folium.features.CircleMarker at 0x7f97ff3510b8>

<folium.features.CircleMarker at 0x7f97ff31e5f8>

<folium.features.CircleMarker at 0x7f97ff32ce10>

<folium.features.CircleMarker at 0x7f97ff32c978>

<folium.features.CircleMarker at 0x7f97ff326550>

<folium.features.CircleMarker at 0x7f97ff3265c0>

<folium.features.CircleMarker at 0x7f97ff3267f0>

<folium.features.CircleMarker at 0x7f97ff31ed68>

<folium.features.CircleMarker at 0x7f97ff326ac8>

<folium.features.CircleMarker at 0x7f97ff3264a8>

<folium.features.CircleMarker at 0x7f97ff2ddb70>

<folium.features.CircleMarker at 0x7f97ff2dd860>

<folium.features.CircleMarker at 0x7f97ff2ddd30>

<folium.features.CircleMarker at 0x7f97ff2ddd68>

<folium.features.CircleMarker at 0x7f97ff2efdd8>

<folium.features.CircleMarker at 0x7f97ff351d68>

<folium.features.CircleMarker at 0x7f97ff2ef9e8>

<folium.features.CircleMarker at 0x7f97ff2ef4e0>

<folium.features.CircleMarker at 0x7f97ff2ef6d8>

<folium.features.CircleMarker at 0x7f97ff2ef828>

<folium.features.CircleMarker at 0x7f97ff302828>

<folium.features.CircleMarker at 0x7f97ff302c18>

<folium.features.CircleMarker at 0x7f97ff326d68>

<folium.features.CircleMarker at 0x7f97ff2dd080>

<folium.features.CircleMarker at 0x7f97ff302e48>

<folium.features.CircleMarker at 0x7f97ff302cc0>

<folium.features.CircleMarker at 0x7f97ff302978>

<folium.features.CircleMarker at 0x7f97ff2e0438>

<folium.features.CircleMarker at 0x7f97ff2e0b70>

<folium.features.CircleMarker at 0x7f97ff2b3898>

<folium.features.CircleMarker at 0x7f97ff302160>

<folium.features.CircleMarker at 0x7f97ff2ef978>

<folium.features.CircleMarker at 0x7f97ff2e0780>

<folium.features.CircleMarker at 0x7f97ff2e0cf8>

<folium.features.CircleMarker at 0x7f97ff2b3e48>

<folium.features.CircleMarker at 0x7f97ff2b3278>

<folium.features.CircleMarker at 0x7f97ff2b31d0>

<folium.features.CircleMarker at 0x7f97ff302358>

<folium.features.CircleMarker at 0x7f97ff2e0b00>

<folium.features.CircleMarker at 0x7f97ff2b35f8>

<folium.features.CircleMarker at 0x7f97ff2abf60>

<folium.features.CircleMarker at 0x7f97ff2ab4e0>

<folium.features.CircleMarker at 0x7f97ff2ab358>

<folium.features.CircleMarker at 0x7f97ff2ab5c0>

<folium.features.CircleMarker at 0x7f97ff2d6e10>

<folium.features.CircleMarker at 0x7f97ff2e0470>

<folium.features.CircleMarker at 0x7f97ff2d6518>

<folium.features.CircleMarker at 0x7f97ff2d6fd0>

<folium.features.CircleMarker at 0x7f97ff2d67b8>

<folium.features.CircleMarker at 0x7f97ff2d6cc0>

<folium.features.CircleMarker at 0x7f97ff2b4780>

<folium.features.CircleMarker at 0x7f97ff2b41d0>

<folium.features.CircleMarker at 0x7f97ff2d6d30>

<folium.features.CircleMarker at 0x7f97ff2d6b00>

<folium.features.CircleMarker at 0x7f97ff2b45c0>

<folium.features.CircleMarker at 0x7f97ff2b4ef0>

<folium.features.CircleMarker at 0x7f97ff2b47b8>

<folium.features.CircleMarker at 0x7f97ff264240>

<folium.features.CircleMarker at 0x7f97ff264940>

<folium.features.CircleMarker at 0x7f97ff272898>

<folium.features.CircleMarker at 0x7f97ff2e0cc0>

<folium.features.CircleMarker at 0x7f97ff2643c8>

<folium.features.CircleMarker at 0x7f97ff264be0>

<folium.features.CircleMarker at 0x7f97ff264470>

<folium.features.CircleMarker at 0x7f97ff272dd8>

<folium.features.CircleMarker at 0x7f97ff272e80>

<folium.features.CircleMarker at 0x7f97ff272438>

<folium.features.CircleMarker at 0x7f97ff2b49b0>

<folium.features.CircleMarker at 0x7f97ff290780>

<folium.features.CircleMarker at 0x7f97ff290978>

<folium.features.CircleMarker at 0x7f97ff2901d0>

<folium.features.CircleMarker at 0x7f97ff290860>

<folium.features.CircleMarker at 0x7f97ff272cf8>

<folium.features.CircleMarker at 0x7f97ff290a58>

<folium.features.CircleMarker at 0x7f97ff28c470>

<folium.features.CircleMarker at 0x7f97ff264b00>

<folium.features.CircleMarker at 0x7f97ff28c400>

<folium.features.CircleMarker at 0x7f97ff290668>

<folium.features.CircleMarker at 0x7f97ff290748>

<folium.features.CircleMarker at 0x7f97ff28c550>

<folium.features.CircleMarker at 0x7f97ff290b00>

<folium.features.CircleMarker at 0x7f97ff2233c8>

<folium.features.CircleMarker at 0x7f97ff28cba8>

<folium.features.CircleMarker at 0x7f97ff28c630>

<folium.features.CircleMarker at 0x7f97ff223e10>

<folium.features.CircleMarker at 0x7f97ff223780>

<folium.features.CircleMarker at 0x7f97ff2232b0>

<folium.features.CircleMarker at 0x7f97ff232ac8>

<folium.features.CircleMarker at 0x7f97ff2322b0>

<folium.features.CircleMarker at 0x7f97ff254940>

<folium.features.CircleMarker at 0x7f97ff223240>

<folium.features.CircleMarker at 0x7f97ff28c780>

<folium.features.CircleMarker at 0x7f97ff254358>

<folium.features.CircleMarker at 0x7f97ff254550>

<folium.features.CircleMarker at 0x7f97ff2326a0>

<folium.features.CircleMarker at 0x7f97ff232908>

<folium.features.CircleMarker at 0x7f97ff232898>

<folium.features.CircleMarker at 0x7f97ff264860>

<folium.features.CircleMarker at 0x7f97ff254438>

<folium.features.CircleMarker at 0x7f97ff2547f0>

<folium.features.CircleMarker at 0x7f97ff21e0f0>

<folium.features.CircleMarker at 0x7f97ff21e400>

<folium.features.CircleMarker at 0x7f97ff21ed30>

<folium.features.CircleMarker at 0x7f97ff21e908>

<folium.features.CircleMarker at 0x7f97ff1ef390>

<folium.features.CircleMarker at 0x7f97ff232a58>

<folium.features.CircleMarker at 0x7f97ff1ef7f0>

<folium.features.CircleMarker at 0x7f97ff1ef6d8>

<folium.features.CircleMarker at 0x7f97ff1ef470>

<folium.features.CircleMarker at 0x7f97ff1ef668>

<folium.features.CircleMarker at 0x7f97ff1e5ef0>

<folium.features.CircleMarker at 0x7f97ff1e5128>

<folium.features.CircleMarker at 0x7f97ff1ef828>

<folium.features.CircleMarker at 0x7f97ff1e5d68>

<folium.features.CircleMarker at 0x7f97ff1efdd8>

<folium.features.CircleMarker at 0x7f97ff1e5400>

<folium.features.CircleMarker at 0x7f97ff1e5668>

<folium.features.CircleMarker at 0x7f97ff1f3898>

<folium.features.CircleMarker at 0x7f97ff1db9e8>

<folium.features.CircleMarker at 0x7f97ff1f33c8>

<folium.features.CircleMarker at 0x7f97ff1e53c8>

<folium.features.CircleMarker at 0x7f97ff1f3a58>

<folium.features.CircleMarker at 0x7f97ff1e56a0>

<folium.features.CircleMarker at 0x7f97ff1e5fd0>

<folium.features.CircleMarker at 0x7f97ff1db780>

<folium.features.CircleMarker at 0x7f97ff1db320>

<folium.features.CircleMarker at 0x7f97ff19c940>

<folium.features.CircleMarker at 0x7f97ff1f3438>

<folium.features.CircleMarker at 0x7f97ff1db5f8>

<folium.features.CircleMarker at 0x7f97ff1db7b8>

<folium.features.CircleMarker at 0x7f97ff19c8d0>

<folium.features.CircleMarker at 0x7f97ff19c550>

<folium.features.CircleMarker at 0x7f97ff19ce10>

<folium.features.CircleMarker at 0x7f97ff19c438>

<folium.features.CircleMarker at 0x7f97ff1bbf98>

<folium.features.CircleMarker at 0x7f97ff1db0f0>

<folium.features.CircleMarker at 0x7f97ff1bbc88>

<folium.features.CircleMarker at 0x7f97ff19cf28>

<folium.features.CircleMarker at 0x7f97ff19c5f8>

<folium.features.CircleMarker at 0x7f97ff19c0f0>

<folium.features.CircleMarker at 0x7f97ff1a14e0>

<folium.features.CircleMarker at 0x7f97ff1a1c50>

<folium.features.CircleMarker at 0x7f97ff1bba58>

<folium.features.CircleMarker at 0x7f97ff1bb940>

<folium.features.CircleMarker at 0x7f97ff1a1b70>

<folium.features.CircleMarker at 0x7f97ff1a1d30>

<folium.features.CircleMarker at 0x7f97ff1cfeb8>

<folium.features.CircleMarker at 0x7f97ff1cf3c8>

<folium.features.CircleMarker at 0x7f97ff1cf4a8>

<folium.features.CircleMarker at 0x7f97ff170978>

<folium.features.CircleMarker at 0x7f97ff1a1748>

<folium.features.CircleMarker at 0x7f97ff1bb6d8>

<folium.features.CircleMarker at 0x7f97ff170588>

<folium.features.CircleMarker at 0x7f97ff1cf4e0>

<folium.features.CircleMarker at 0x7f97ff1cf828>

<folium.features.CircleMarker at 0x7f97ff1cfda0>

<folium.features.CircleMarker at 0x7f97ff1a13c8>

<folium.features.CircleMarker at 0x7f97ff1cf0f0>

<folium.features.CircleMarker at 0x7f97ff170780>

<folium.features.CircleMarker at 0x7f97ff1702e8>

<folium.features.CircleMarker at 0x7f97ff15f0f0>

<folium.features.CircleMarker at 0x7f97ff15ff28>

<folium.features.CircleMarker at 0x7f97ff1725c0>

<folium.features.CircleMarker at 0x7f97ff170630>

<folium.features.CircleMarker at 0x7f97ff15f208>

<folium.features.CircleMarker at 0x7f97ff15f4a8>

<folium.features.CircleMarker at 0x7f97ff15f320>

<folium.features.CircleMarker at 0x7f97ff172a20>

<folium.features.CircleMarker at 0x7f97ff169710>

<folium.features.CircleMarker at 0x7f97ff172e48>

<folium.features.CircleMarker at 0x7f97ff1697f0>

<folium.features.CircleMarker at 0x7f97ff15fa20>

<folium.features.CircleMarker at 0x7f97ff15f748>

<folium.features.CircleMarker at 0x7f97ff1722e8>

<folium.features.CircleMarker at 0x7f97ff169c18>

<folium.features.CircleMarker at 0x7f97ff169208>

<folium.features.CircleMarker at 0x7f98020e9908>

<folium.features.CircleMarker at 0x7f97ff11f2e8>

<folium.features.CircleMarker at 0x7f97ff11fd68>

<folium.features.CircleMarker at 0x7f97ff11f4e0>

<folium.features.CircleMarker at 0x7f984d15b908>

<folium.features.CircleMarker at 0x7f97ff1696d8>

<folium.features.CircleMarker at 0x7f97ff169588>

In [147]:
map_melb

# Fetching the list of Indian Restaurants in Melbourne CBD and render them on Map!!

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

In [148]:
CLIENT_ID = 'J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ' # your Foursquare ID
CLIENT_SECRET = 'HTBEPGSNNYG2X0WW4ZUTDELC3DMLEVJBREI2WWTXQEL34GLK' # your Foursquare Secret
VERSION = '20180604'# Foursquare API version, Then try todays date

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ
CLIENT_SECRET:HTBEPGSNNYG2X0WW4ZUTDELC3DMLEVJBREI2WWTXQEL34GLK


#### Let's explore Melbourne CBD first, I believe there must be many Indian Restaurants in CBD!!

In [149]:
df_Mel_Subs[df_Mel_Subs['suburb']=='Melbourne']

Unnamed: 0,suburb,latitude,longitude
378,Melbourne,-37.81,144.97


In [150]:
search_query = 'Indian'
radius = 1000
LIMIT = 50
print(search_query + ' .... OK!')

Indian .... OK!


In [151]:
address = 'Melbourne, VIC'
#-37.5939889 145.0337333
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.8142176 144.9631608


In [152]:
# latitude=-37.81
# longitude=144.97

In [153]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    latitude, 
    longitude, 
    VERSION, 
    search_query, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ&client_secret=HTBEPGSNNYG2X0WW4ZUTDELC3DMLEVJBREI2WWTXQEL34GLK&ll=-37.8142176,144.9631608&v=20180604&query=Indian&radius=1000&limit=50'

In [154]:
results = requests.get(url).json()
#results

In [155]:
results

{'meta': {'code': 200, 'requestId': '5efe540a36812b1176fcfbc3'},
 'response': {'venues': [{'id': '4b768359f964a520b84f2ee3',
    'name': 'Shiraaz Fine Indian Cuisine',
    'location': {'address': '22 William St',
     'crossStreet': 'at Flinders Ln',
     'lat': -37.818692,
     'lng': 144.95994,
     'labeledLatLngs': [{'label': 'display',
       'lat': -37.818692,
       'lng': 144.95994}],
     'distance': 572,
     'postalCode': '3000',
     'cc': 'AU',
     'city': 'Melbourne',
     'state': 'VIC',
     'country': 'Australia',
     'formattedAddress': ['22 William St (at Flinders Ln)',
      'Melbourne VIC 3000',
      'Australia']},
    'categories': [{'id': '4bf58dd8d48988d10f941735',
      'name': 'Indian Restaurant',
      'pluralName': 'Indian Restaurants',
      'shortName': 'Indian',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/indian_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1593725738',
    'hasPerk': False},
   {'

In [156]:
venue_data=results['response']['venues']
#venue_data

In [157]:
venue_details=[]
for row in venue_data:
    try:
        venue_id=row['id']
        venue_name=row['name']
        venue_lat=row['location']['lat']
        venue_lng=row['location']['lng']
        venue_category=row['categories'][0]['name']
        venue_details.append([venue_id,venue_name,venue_lat,venue_lng,venue_category])
    except KeyError:
        pass
        
column_names=['ID','Name','Lat','Lng','Category']
venues = pd.DataFrame(venue_details,columns=column_names) 
venues

Unnamed: 0,ID,Name,Lat,Lng,Category
0,4b768359f964a520b84f2ee3,Shiraaz Fine Indian Cuisine,-37.818692,144.95994,Indian Restaurant
1,5aa4d624b1538e4b65bf900d,Walkers Stop Convenience Indian Grocery,-37.814998,144.954822,Grocery Store
2,4ba44773f964a520799238e3,Drums Indian Cafe,-37.806458,144.958643,Indian Restaurant
3,4b05874df964a5205e8a22e3,Gaylord Indian Restaurant,-37.820263,144.954499,Indian Restaurant
4,4b05874ef964a520b28a22e3,Nirankar Indian Restaurant,-37.814383,144.960548,Indian Restaurant
5,5243b8f22fc65bb2d81f5182,Indian Mirror,-37.815488,144.96652,Bookstore
6,5b403404c9a5170039c289b4,Tejas Modern Indian,-37.815271,144.961671,Indian Restaurant
7,544dc387498ea3736c48cab7,indian visa embassy,-37.815986,144.967305,Embassy / Consulate
8,4dbe28db0437955ec05ee699,Indian Embassy @ Melbourne,-37.816382,144.967016,Embassy / Consulate
9,4e15208952b1b9e5643e0fd0,Indian Passport and Visa Services Centre,-37.816272,144.967,Embassy / Consulate


In [158]:
indian_resturants_melb=venues[venues['Category']=='Indian Restaurant'] 
#indian_resturants_melb

In [159]:
print("No of Indian Restaurants in Melbourne CBD is",str(len(indian_resturants_melb)))

No of Indian Restaurants in Melbourne CBD is 11


In [160]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Melbourne',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Indian restaurants as blue circle markers
for lat, lng, label in zip(indian_resturants_melb.Lat, indian_resturants_melb.Lng, indian_resturants_melb.Name):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

<folium.features.CircleMarker at 0x7f97fae72630>

<folium.features.CircleMarker at 0x7f97fae72ac8>

<folium.features.CircleMarker at 0x7f97fae85f28>

<folium.features.CircleMarker at 0x7f97fae85860>

<folium.features.CircleMarker at 0x7f97fae85278>

<folium.features.CircleMarker at 0x7f97fae859b0>

<folium.features.CircleMarker at 0x7f97fae85c50>

<folium.features.CircleMarker at 0x7f97fae85b00>

<folium.features.CircleMarker at 0x7f97fae6d2e8>

<folium.features.CircleMarker at 0x7f97fae85dd8>

<folium.features.CircleMarker at 0x7f97fae6d940>

<folium.features.CircleMarker at 0x7f97fae85b38>

In [161]:
# display map
venues_map

# Search & Map all the Indian Restaurants in and around Melbourne suburbs

#### Let's create a function to repeat the same process as Melbourne, CBD to all the suburbs in Melbourne

In [162]:
def get_venues(lat,lng):
    
    #set variables
    radius=1000
    LIMIT=200
    search_query = 'Indian'
    CLIENT_ID = 'J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ' # your Foursquare ID
    CLIENT_SECRET = 'HTBEPGSNNYG2X0WW4ZUTDELC3DMLEVJBREI2WWTXQEL34GLK' # your Foursquare Secret
    VERSION = '20200401' # Foursquare API version

    #url to fetch data from foursquare api
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET,
            lat, 
            lng, 
            VERSION, 
            search_query, 
            radius, 
            LIMIT)
    
    # get all the data
    results = requests.get(url).json()
    #print('##################')
    #print(results)
    venue_data=results['response']['venues']
    venue_details=[]
    for row in venue_data:
        try:
            venue_id=row['id']
            venue_name=row['name']
            venue_lat=row['location']['lat']
            venue_lng=row['location']['lng']
            venue_category=row['categories'][0]['name']
            venue_details.append([venue_id,venue_name,venue_lat,venue_lng,venue_category])
        except KeyError:
            pass
        
    column_names=['ID','Name','Lat','Lng','Category']
    df = pd.DataFrame(venue_details,columns=column_names)
    return df

In [163]:
# prepare neighborhood list that contains indian resturants
column_names=['Suburb', 'ID','Name','Lat','Lng']
indian_rest=pd.DataFrame(columns=column_names)
count=1
for row in df_Mel_Subs.values.tolist():
    try:
        Suburb, Latitude, Longitude=row
        venues = get_venues(Latitude,Longitude)
        #print(venues)
    except:
        print("Oops!", sys.exc_info()[0], "occurred while trying ",Suburb)
        print("Trying Again!!")
        
    indian_resturants=venues[venues['Category']=='Indian Restaurant']
    print('(',count,'/',len(df_Mel_Subs),')','Indian Restaurant in '+Suburb+', '+'VIC'+':'+str(len(indian_resturants)))
    for resturant_detail in indian_resturants.values.tolist():
        id, name , lat, lng, category=resturant_detail
        indian_rest = indian_rest.append({'Suburb': Suburb,
                                                'ID': id,
                                                'Name' : name,
                                                'Lat' : lat,
                                                'Lng' : lng
                                               }, ignore_index=True)
    count+=1

( 1 / 379 ) Indian Restaurant in Abbotsford, VIC:1
( 2 / 379 ) Indian Restaurant in Aberfeldie, VIC:0
( 3 / 379 ) Indian Restaurant in Airport West, VIC:0
( 4 / 379 ) Indian Restaurant in Albanvale, VIC:0
( 5 / 379 ) Indian Restaurant in Albert Park, VIC:5
( 6 / 379 ) Indian Restaurant in Albion, VIC:1
( 7 / 379 ) Indian Restaurant in Keilor, VIC:0
( 8 / 379 ) Indian Restaurant in Alphington, VIC:1
( 9 / 379 ) Indian Restaurant in Altona Meadows, VIC:0
( 10 / 379 ) Indian Restaurant in Altona North, VIC:0
( 11 / 379 ) Indian Restaurant in Altona, VIC:0
( 12 / 379 ) Indian Restaurant in Ardeer, VIC:0
( 13 / 379 ) Indian Restaurant in Kensington, VIC:0
( 14 / 379 ) Indian Restaurant in Kew East, VIC:0
( 15 / 379 ) Indian Restaurant in Kew, VIC:1
( 16 / 379 ) Indian Restaurant in Keysborough, VIC:0
( 17 / 379 ) Indian Restaurant in Kilsyth South, VIC:0
( 18 / 379 ) Indian Restaurant in Armadale, VIC:0
( 19 / 379 ) Indian Restaurant in Ascot Vale, VIC:1
( 20 / 379 ) Indian Restaurant in As

In [164]:
indian_rest

Unnamed: 0,Suburb,ID,Name,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,-37.831110,144.953620
...,...,...,...,...,...
235,Windsor,5811d139d67ce0d723652355,spicy fusion Indian bar and restaurant,-37.846501,144.993526
236,Windsor,5299966911d24998f6446a66,INDIAN TAKE AWAY OR EAT IN,-37.839181,144.992992
237,Windsor,4b8cb19af964a520afda32e3,Flag Of India Indian Restaurant,-37.847543,144.999303
238,Windsor,4b9994e6f964a5204b8535e3,Yarra Indian Take Away,-37.839297,144.993386


### Business Problem

Fetch the list of unique list of suburbs from the above dataframe to get the list of suburbs with minimum one existing Indian Restaurant. We can minus this list from the list of suburbs in Melbourne and get the list of suburbs which lack Indian Restaurants

In [165]:
df_A = indian_rest[['Suburb']].drop_duplicates(['Suburb'],keep='first', ignore_index=True)
df_A.shape

(122, 1)

In [166]:
df_B = df_Mel_Subs[['suburb']].drop_duplicates(['suburb'],keep='first', ignore_index=True)
df_B.shape

(371, 1)

We can remove suburbs with Indian Restaurants(df_A) from list of suburbs in Melbourne(df_B) and get the list of suburbs which do not have an Indian Restaurant

In [167]:
# df1[df1.ID.isin(df2.ID) == False]
df_C = df_B[df_B['suburb'].isin(df_A['Suburb']) == False].reset_index(drop=True)
df_C.shape

(249, 1)

#### i.e there are 312 suburbs without an Indian Restaurants. We can get the list from below....

In [168]:
df_C['suburb'].tolist()

['Aberfeldie',
 'Airport West',
 'Albanvale',
 'Keilor',
 'Altona Meadows',
 'Altona North',
 'Altona',
 'Ardeer',
 'Kensington',
 'Kew East',
 'Keysborough',
 'Kilsyth South',
 'Armadale',
 'Aspendale Gardens',
 'Aspendale',
 'Attwood',
 'Kilsyth',
 'Kings Park',
 'Auburn',
 'Avondale Heights',
 'Balaclava',
 'Kingsville',
 'Knoxfield',
 'Kooyong',
 'Lalor',
 'Langwarrin',
 'Balwyn North',
 'Bayswater North',
 'Bayswater',
 'Laverton North',
 'Laverton',
 'Lilydale',
 'Lower Plenty',
 'Lynbrook',
 'Lyndhurst',
 'Lysterfield',
 'Belgrave Heights',
 'Belgrave South',
 'Belgrave',
 'Macleod',
 'Maidstone',
 'Malvern',
 'Berwick',
 'Bittern',
 'Meadow Heights',
 'Melbourne Airport',
 'Melton South',
 'Blairgowrie',
 'Bonbeach',
 'Mernda',
 'Mill Park',
 'Monbulk',
 'Boronia',
 'Box Hill North',
 'Mont Albert North',
 'Mont Albert',
 'Montmorency',
 'Montrose',
 'Box Hill South',
 'Box Hill',
 'Braeside',
 'Braybrook',
 'Briar Hill',
 'Broadmeadows',
 'Brookfield',
 'Brooklyn',
 'Brunswick

## Business Problem # 3
### Q. Which areas lack Indian Restaurants?
#### Hence we got the solution to business problem # 3 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#### Lets checkout the number of Indian Restaurants we were able to fetch using the FourSquares. There will be many duplicates among them, as we used 1000m distance. So lets check for duplicates and keep the first occuring restaurant in the list and delete rest.

In [169]:
indian_rest.shape

(240, 5)

In [170]:
duplicateRowsDF = indian_rest[indian_rest.duplicated(['ID','Name','Lat','Lng'])]
duplicateRowsDF.shape

(137, 5)

In [171]:
indian_rest.drop_duplicates(['ID','Name','Lat','Lng'],keep='first', inplace=True, ignore_index=True)

In [172]:
indian_rest.shape

(103, 5)

#### Fetaching the geograpgical localtion of Melbourne to map

In [173]:
address = 'Melbourne, VIC'
#-37.5939889 145.0337333
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.8142176 144.9631608


#### Now we can plot all the Indian Restaurants in Melbourne Suburbs on Maps

In [174]:
indian_restaurant_map = folium.Map(location=[latitude, longitude], zoom_start=10) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Melbourne',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(indian_restaurant_map)

<folium.features.CircleMarker at 0x7f97faf05860>

In [175]:
# add the Indian restaurants as blue circle markers
for lat, lng, label, suburb in zip(indian_rest.Lat, indian_rest.Lng, indian_rest.Name, indian_rest.Suburb):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        #popup=label,
        popup=folium.Popup(label+','+suburb, parse_html=True, max_width=100),
        fill = True,
        fill_color='blue',
        fill_opacity=0.6,
        parse_html=True
    ).add_to(indian_restaurant_map)

<folium.features.CircleMarker at 0x7f97fadf0be0>

<folium.features.CircleMarker at 0x7f97faddf9b0>

<folium.features.CircleMarker at 0x7f97fadf0080>

<folium.features.CircleMarker at 0x7f97fadf0c50>

<folium.features.CircleMarker at 0x7f97fad9a320>

<folium.features.CircleMarker at 0x7f98020e9ac8>

<folium.features.CircleMarker at 0x7f97fada0f28>

<folium.features.CircleMarker at 0x7f97faddf278>

<folium.features.CircleMarker at 0x7f97fae00dd8>

<folium.features.CircleMarker at 0x7f97fadf0d30>

<folium.features.CircleMarker at 0x7f97faddf1d0>

<folium.features.CircleMarker at 0x7f97fae005f8>

<folium.features.CircleMarker at 0x7f97fad9aeb8>

<folium.features.CircleMarker at 0x7f97fada04a8>

<folium.features.CircleMarker at 0x7f97fad9aba8>

<folium.features.CircleMarker at 0x7f97fad9a240>

<folium.features.CircleMarker at 0x7f97fad9ab70>

<folium.features.CircleMarker at 0x7f97fae00358>

<folium.features.CircleMarker at 0x7f97fada0dd8>

<folium.features.CircleMarker at 0x7f97fadcdba8>

<folium.features.CircleMarker at 0x7f97fadcd8d0>

<folium.features.CircleMarker at 0x7f97fadcdf28>

<folium.features.CircleMarker at 0x7f97fada03c8>

<folium.features.CircleMarker at 0x7f97fadcd5c0>

<folium.features.CircleMarker at 0x7f97fadca780>

<folium.features.CircleMarker at 0x7f97fadcdcc0>

<folium.features.CircleMarker at 0x7f97fadca400>

<folium.features.CircleMarker at 0x7f97fad71048>

<folium.features.CircleMarker at 0x7f97fad718d0>

<folium.features.CircleMarker at 0x7f97fadcd710>

<folium.features.CircleMarker at 0x7f97fad713c8>

<folium.features.CircleMarker at 0x7f97fadca278>

<folium.features.CircleMarker at 0x7f97fad71080>

<folium.features.CircleMarker at 0x7f97fad710b8>

<folium.features.CircleMarker at 0x7f97fad78748>

<folium.features.CircleMarker at 0x7f97fadca710>

<folium.features.CircleMarker at 0x7f97fad71908>

<folium.features.CircleMarker at 0x7f97fad78c50>

<folium.features.CircleMarker at 0x7f97fad781d0>

<folium.features.CircleMarker at 0x7f97fad782e8>

<folium.features.CircleMarker at 0x7f97fad5ff98>

<folium.features.CircleMarker at 0x7f97fad784e0>

<folium.features.CircleMarker at 0x7f97fad71198>

<folium.features.CircleMarker at 0x7f97fad5feb8>

<folium.features.CircleMarker at 0x7f97fad5fc50>

<folium.features.CircleMarker at 0x7f97fad5f160>

<folium.features.CircleMarker at 0x7f97fad3bf60>

<folium.features.CircleMarker at 0x7f97fad78da0>

<folium.features.CircleMarker at 0x7f97fad5fcc0>

<folium.features.CircleMarker at 0x7f97fad3b240>

<folium.features.CircleMarker at 0x7f97fad3b518>

<folium.features.CircleMarker at 0x7f97fad3b9e8>

<folium.features.CircleMarker at 0x7f97fad3b908>

<folium.features.CircleMarker at 0x7f97fad3bda0>

<folium.features.CircleMarker at 0x7f97fad3ba58>

<folium.features.CircleMarker at 0x7f97fad4de10>

<folium.features.CircleMarker at 0x7f97fad4da20>

<folium.features.CircleMarker at 0x7f97fad4d2b0>

<folium.features.CircleMarker at 0x7f97fad4d390>

<folium.features.CircleMarker at 0x7f97fad31550>

<folium.features.CircleMarker at 0x7f97facdca58>

<folium.features.CircleMarker at 0x7f97fad31908>

<folium.features.CircleMarker at 0x7f97fad4d320>

<folium.features.CircleMarker at 0x7f97fad3bd30>

<folium.features.CircleMarker at 0x7f97fad31e80>

<folium.features.CircleMarker at 0x7f97fad319e8>

<folium.features.CircleMarker at 0x7f97facdc630>

<folium.features.CircleMarker at 0x7f97facdcda0>

<folium.features.CircleMarker at 0x7f97fad31898>

<folium.features.CircleMarker at 0x7f97facdc748>

<folium.features.CircleMarker at 0x7f97facdc7f0>

<folium.features.CircleMarker at 0x7f97facdcf98>

<folium.features.CircleMarker at 0x7f97facdf390>

<folium.features.CircleMarker at 0x7f97facdfef0>

<folium.features.CircleMarker at 0x7f97facdf4a8>

<folium.features.CircleMarker at 0x7f97facdf438>

<folium.features.CircleMarker at 0x7f97fad31208>

<folium.features.CircleMarker at 0x7f97facdc198>

<folium.features.CircleMarker at 0x7f97facdff28>

<folium.features.CircleMarker at 0x7f97fad04630>

<folium.features.CircleMarker at 0x7f97fad04dd8>

<folium.features.CircleMarker at 0x7f97fad04048>

<folium.features.CircleMarker at 0x7f97fad0ac88>

<folium.features.CircleMarker at 0x7f97fad0a9e8>

<folium.features.CircleMarker at 0x7f97facdff60>

<folium.features.CircleMarker at 0x7f97facdfe80>

<folium.features.CircleMarker at 0x7f97fad0a860>

<folium.features.CircleMarker at 0x7f97fad0a588>

<folium.features.CircleMarker at 0x7f97facae6a0>

<folium.features.CircleMarker at 0x7f97facaef28>

<folium.features.CircleMarker at 0x7f97facaeb38>

<folium.features.CircleMarker at 0x7f97facb9e80>

<folium.features.CircleMarker at 0x7f97fad04fd0>

<folium.features.CircleMarker at 0x7f97fad0af98>

<folium.features.CircleMarker at 0x7f97facb9630>

<folium.features.CircleMarker at 0x7f97facb98d0>

<folium.features.CircleMarker at 0x7f97facb95f8>

<folium.features.CircleMarker at 0x7f97facae550>

<folium.features.CircleMarker at 0x7f97facaebe0>

<folium.features.CircleMarker at 0x7f97facca7f0>

<folium.features.CircleMarker at 0x7f97fad0a550>

<folium.features.CircleMarker at 0x7f97facae518>

<folium.features.CircleMarker at 0x7f97facb93c8>

In [176]:
indian_restaurant_map

# Getting Ratings / Likes for each restaurant

#### Single Restaurant

In [177]:
id = '5a593d245c683829c6e59cbd'

url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(
        id,
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION)

print(url) 

results = requests.get(url).json() 
likes_count = results['response']['likes']['count']
likes_count

https://api.foursquare.com/v2/venues/5a593d245c683829c6e59cbd/likes?client_id=J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ&client_secret=HTBEPGSNNYG2X0WW4ZUTDELC3DMLEVJBREI2WWTXQEL34GLK&v=20180604


0

#### Fetch the likes for all the indian restaurants in Melbourne

In [178]:
indian_rest.head()

Unnamed: 0,Suburb,ID,Name,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,-37.83111,144.95362


In [179]:
#function to fetch
def get_venue_like(id):
    
    #set variables
    CLIENT_ID = 'J4UCJKKWCHXHRU1NZ13LCUZRTECADD4IY1OT0TKTLR5O2KNJ' # your Foursquare ID
    CLIENT_SECRET = 'HTBEPGSNNYG2X0WW4ZUTDELC3DMLEVJBREI2WWTXQEL34GLK' # your Foursquare Secret
    VERSION = '20200401' # Foursquare API version

    #url to fetch data from foursquare api
    url = 'https://api.foursquare.com/v2/venues/{}/likes?client_id={}&client_secret={}&v={}'.format(id,
                                                                                                    CLIENT_ID,
                                                                                                    CLIENT_SECRET,
                                                                                                    VERSION)
    
    # get all the data
    results = requests.get(url).json()
    #print('##################')
    #print(results)
    likes_count = results['response']['likes']['count']
    
    return likes_count

In [180]:
# prepare neighborhood list that contains indian resturants and likes
column_names=['Suburb', 'ID','Name','Likes','Lat','Lng']
indian_rest_mel=pd.DataFrame(columns=column_names)
for row in indian_rest.values.tolist():
    try:
        Suburb, ID, Name, Latitude, Longitude=row
        likes = get_venue_like(ID)
    except:
        print("Oops!", sys.exc_info()[0], "occurred while trying for ",Name, Suburb)
        print("Trying Again!!")
    indian_rest_mel = indian_rest_mel.append({'Suburb': Suburb,
                                              'ID': ID,
                                              'Name' : Name,
                                              'Likes' : likes,
                                              'Lat' : Latitude,
                                              'Lng' : Longitude
                                              }, ignore_index=True)

In [181]:
indian_rest_mel

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.831110,144.953620
...,...,...,...,...,...,...
98,Upwey,4e0ee71f18a8bf9784d1c2f1,Jai Ho Indian Takeaway,0,-37.902920,145.329860
99,Vermont South,4b73bc89f964a5201eba2de3,Handis Indian Restaurant,2,-37.836632,145.195989
100,Wantirna South,4bf5f3b2d4cdb713ea3f84fe,Khazana Indian Restaurant,2,-37.869397,145.244062
101,Williamstown North,51d7dc23498e1aa1c2fe510b,Raga Indian Cuisine,1,-37.861763,144.902595


# Clustering the Indian Restaurants based on the user likes and rendering them on Map based on the cluster

#### Let's check how many venues were returned for each suburb

In [182]:
df_bp2 = indian_rest_mel
df_bp2

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.831110,144.953620
...,...,...,...,...,...,...
98,Upwey,4e0ee71f18a8bf9784d1c2f1,Jai Ho Indian Takeaway,0,-37.902920,145.329860
99,Vermont South,4b73bc89f964a5201eba2de3,Handis Indian Restaurant,2,-37.836632,145.195989
100,Wantirna South,4bf5f3b2d4cdb713ea3f84fe,Khazana Indian Restaurant,2,-37.869397,145.244062
101,Williamstown North,51d7dc23498e1aa1c2fe510b,Raga Indian Cuisine,1,-37.861763,144.902595


Perform groupby operation on the "Suburb" field and order by the number of Indian Restaurants. Then get the top 5 suburbs with most Indian Restaurants

In [183]:
df_bp2.groupby('Suburb')['ID'].count().sort_values(ascending=False).head(5)

Suburb
Docklands         10
Prahran            9
South Wharf        5
Albert Park        5
East Melbourne     5
Name: ID, dtype: int64

These are the suburbs with most number of Indian Restaurants. Therefore we can conclude that these suburbs have the most number of customers for Indian Food. Hence these are the best suburbs to start a new Indian Restaurant

## Business Problem # 2
### Q. Find the top 3 suburbs for starting an Indian Restaurant?
#### Hence we got the solution to business problem # 2 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

From the above information, we can even deduce that with 10 Indian Restaurants in the reach, Docklands is the suburb to stay in, if one want to be in reach of multiple Indian Restaurants.

## Business Problem # 4
### Q. What is the best place to stay in Melbourne, if you want to stay close to Indian Food?Find the top 3 suburbs for starting an Indian Restaurant?
#### Hence we got the solution to business problem # 4 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

From the above information, we can get that with almost 10 restaurants in the reach respectively Docklands and Prahran are the go to areas if one wishes to have good Indian Food.

## Business Problem # 5
### Q. What is the best location to go if you wish to eat good Indian Food?
#### Hence we got the solution to business problem # 5 above

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

## Pre-Processing

As you can see, all other columns other than "Likes" in this dataset is a categorical variable. k-means algorithm isn't directly applicable to categorical variables because Euclidean distance function isn't really meaningful for discrete variables. So, lets drop these columns and run clustering.

In [184]:
df = indian_rest_mel[['Likes']]
df.head()

Unnamed: 0,Likes
0,0
1,0
2,0
3,0
4,1


## Normalizing over the standard deviation

Now let's normalize the dataset. But why do we need normalization in the first place? Normalization is a statistical method that helps mathematical-based algorithms to interpret features with different magnitudes and distributions equally. We use StandardScaler() to normalize our dataset.

In [185]:
from sklearn.preprocessing import StandardScaler
X = df.values
X = np.nan_to_num(X)
Clus_dataSet = StandardScaler().fit_transform(X)
Clus_dataSet



array([[-0.30575417],
       [-0.30575417],
       [-0.30575417],
       [-0.30575417],
       [-0.14342077],
       [ 0.18124603],
       [-0.30575417],
       [ 0.50591284],
       [-0.14342077],
       [ 0.01891263],
       [-0.30575417],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.14342077],
       [ 0.01891263],
       [-0.14342077],
       [-0.14342077],
       [-0.30575417],
       [-0.14342077],
       [-0.30575417],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.30575417],
       [-0.14342077],
       [-0.14342077],
       [-0.30575417],
       [-0.30575417],
       [-0.30575417],
       [ 0.99291304],
       [-0.14342077],
       [-0.14342077],
       [-0.30575417],
       [ 0.01891263],
       [-0.30575417],
       [-0.30575417],
       [ 0.50591284],
       [-0

<h2 id="modeling">Modeling</h2>

Using the K-means clustering we can do modeling much easier, we need not guess it manually which cluster each restaurant falls into.

Lets apply k-means on our dataset, and take look at cluster labels.

In [186]:
clusterNum = 3
k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12)
k_means.fit(X)
labels = k_means.labels_
print(labels)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=3, n_init=12, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

[0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 2 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0
 2 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 1 0 0 0 0 0 0 0 0 0 0 0]


<h2 id="insights">Insights</h2>
We assign the labels to each row in dataframe.

In [187]:
indian_rest_mel["Clus_km"] = labels
indian_rest_mel.head(5)

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983,0
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784,0
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427,0
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201,0
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.83111,144.95362,0


#### Fetching Melbourne geo location for mapping

In [188]:
address = 'Melbourne, VIC'
#-37.5939889 145.0337333
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

-37.8142176 144.9631608


In [189]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(clusterNum)
ys = [i + x + (i*x)**2 for i in range(clusterNum)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, suburb, name, cluster in zip(indian_rest_mel['Lat'], indian_rest_mel['Lng'], indian_rest_mel['Suburb'], 
                                           indian_rest_mel['Name'], indian_rest_mel['Clus_km']):
    label = folium.Popup(str(name) + str(suburb) +' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<folium.features.CircleMarker at 0x7f97faba2e10>

<folium.features.CircleMarker at 0x7f97fabbe3c8>

<folium.features.CircleMarker at 0x7f97fabbe828>

<folium.features.CircleMarker at 0x7f97fabbe358>

<folium.features.CircleMarker at 0x7f97faba2e80>

<folium.features.CircleMarker at 0x7f97fabbe400>

<folium.features.CircleMarker at 0x7f97fab59be0>

<folium.features.CircleMarker at 0x7f97fab59438>

<folium.features.CircleMarker at 0x7f97fab59f28>

<folium.features.CircleMarker at 0x7f97fab598d0>

<folium.features.CircleMarker at 0x7f97fab59a58>

<folium.features.CircleMarker at 0x7f97fab59ef0>

<folium.features.CircleMarker at 0x7f97fab5eb00>

<folium.features.CircleMarker at 0x7f97fab5e3c8>

<folium.features.CircleMarker at 0x7f97fab5ecc0>

<folium.features.CircleMarker at 0x7f97fab5ef60>

<folium.features.CircleMarker at 0x7f97fab5ea58>

<folium.features.CircleMarker at 0x7f97fab59908>

<folium.features.CircleMarker at 0x7f97faba2240>

<folium.features.CircleMarker at 0x7f97fabbee80>

<folium.features.CircleMarker at 0x7f97fabbe9b0>

<folium.features.CircleMarker at 0x7f97fabbea20>

<folium.features.CircleMarker at 0x7f97fab649e8>

<folium.features.CircleMarker at 0x7f97fabbe9e8>

<folium.features.CircleMarker at 0x7f97faba29e8>

<folium.features.CircleMarker at 0x7f97fab64cc0>

<folium.features.CircleMarker at 0x7f97fab64da0>

<folium.features.CircleMarker at 0x7f97fab644e0>

<folium.features.CircleMarker at 0x7f97fab64518>

<folium.features.CircleMarker at 0x7f97fab64fd0>

<folium.features.CircleMarker at 0x7f97fab8dd30>

<folium.features.CircleMarker at 0x7f97fab5e048>

<folium.features.CircleMarker at 0x7f97fab64e10>

<folium.features.CircleMarker at 0x7f97fab8df60>

<folium.features.CircleMarker at 0x7f97fab8dac8>

<folium.features.CircleMarker at 0x7f97fab8d940>

<folium.features.CircleMarker at 0x7f97fab8d390>

<folium.features.CircleMarker at 0x7f97fab62be0>

<folium.features.CircleMarker at 0x7f97fab64c18>

<folium.features.CircleMarker at 0x7f97fab8d898>

<folium.features.CircleMarker at 0x7f97fab620b8>

<folium.features.CircleMarker at 0x7f97fab62978>

<folium.features.CircleMarker at 0x7f97fab62940>

<folium.features.CircleMarker at 0x7f97fab622e8>

<folium.features.CircleMarker at 0x7f97fab3cb70>

<folium.features.CircleMarker at 0x7f97fab64c50>

<folium.features.CircleMarker at 0x7f97fab62278>

<folium.features.CircleMarker at 0x7f97fab3c358>

<folium.features.CircleMarker at 0x7f97fab3c6a0>

<folium.features.CircleMarker at 0x7f97fab3c240>

<folium.features.CircleMarker at 0x7f97fab3c7f0>

<folium.features.CircleMarker at 0x7f97fab1db70>

<folium.features.CircleMarker at 0x7f97fab62e10>

<folium.features.CircleMarker at 0x7f97fab62710>

<folium.features.CircleMarker at 0x7f97fab1d7b8>

<folium.features.CircleMarker at 0x7f97fab1d208>

<folium.features.CircleMarker at 0x7f97fab1ddd8>

<folium.features.CircleMarker at 0x7f97fab1df98>

<folium.features.CircleMarker at 0x7f97fab55b00>

<folium.features.CircleMarker at 0x7f97fab3cd30>

<folium.features.CircleMarker at 0x7f97fab1d320>

<folium.features.CircleMarker at 0x7f97fab55780>

<folium.features.CircleMarker at 0x7f97fab55278>

<folium.features.CircleMarker at 0x7f97fab55c50>

<folium.features.CircleMarker at 0x7f97fab559e8>

<folium.features.CircleMarker at 0x7f97fab2fa90>

<folium.features.CircleMarker at 0x7f97fab1d898>

<folium.features.CircleMarker at 0x7f97fab550b8>

<folium.features.CircleMarker at 0x7f97fab2f550>

<folium.features.CircleMarker at 0x7f97fab2ff98>

<folium.features.CircleMarker at 0x7f97fab2f9e8>

<folium.features.CircleMarker at 0x7f97fab2fe80>

<folium.features.CircleMarker at 0x7f97faade240>

<folium.features.CircleMarker at 0x7f97fab554e0>

<folium.features.CircleMarker at 0x7f97fab2f940>

<folium.features.CircleMarker at 0x7f97faadeb70>

<folium.features.CircleMarker at 0x7f97faadec50>

<folium.features.CircleMarker at 0x7f97faade588>

<folium.features.CircleMarker at 0x7f97faade5c0>

<folium.features.CircleMarker at 0x7f97faade748>

<folium.features.CircleMarker at 0x7f97fab2f2b0>

<folium.features.CircleMarker at 0x7f97faade978>

<folium.features.CircleMarker at 0x7f97fab092e8>

<folium.features.CircleMarker at 0x7f97fab09e80>

<folium.features.CircleMarker at 0x7f97fab09ac8>

<folium.features.CircleMarker at 0x7f97fab09080>

<folium.features.CircleMarker at 0x7f97fab09cc0>

<folium.features.CircleMarker at 0x7f97faade710>

<folium.features.CircleMarker at 0x7f97fab09dd8>

<folium.features.CircleMarker at 0x7f97faadf278>

<folium.features.CircleMarker at 0x7f97faadfdd8>

<folium.features.CircleMarker at 0x7f97faadf6d8>

<folium.features.CircleMarker at 0x7f97fab07438>

<folium.features.CircleMarker at 0x7f97faadfba8>

<folium.features.CircleMarker at 0x7f97fab09240>

<folium.features.CircleMarker at 0x7f97faadff28>

<folium.features.CircleMarker at 0x7f97faadfa20>

<folium.features.CircleMarker at 0x7f97fab07be0>

<folium.features.CircleMarker at 0x7f97fab070b8>

<folium.features.CircleMarker at 0x7f97fab07860>

<folium.features.CircleMarker at 0x7f97fab07710>

<folium.features.CircleMarker at 0x7f97faadf7f0>

<folium.features.CircleMarker at 0x7f97faaac9e8>

## Business Problem # 1
### Q. List and visualize all the major parts of Melbourne City which have popular Indian Restaurants
#### The above rendered map is the solution to business problem # 1

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

# Examine Clusters

Indian Restaurants are clustered based on the user rating into 3 different clusters.

Cluster 1 --> Low or No Rating

Cluster 2 --> With Highest Rating

Cluster 3 --> Medium Rating

#### Custer 1

In [190]:
indian_rest_mel.loc[indian_rest_mel['Clus_km'] == 0, indian_rest_mel.columns[[0] + list(range(1, indian_rest_mel.shape[1]))]]

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
0,Abbotsford,5c3c5818bd4009002c5cf256,Dana Pani Indian Restaurant,0,-37.810279,144.998983,0
1,Albert Park,4c6369a969a1c9b6bd4b3ba4,indian murchi,0,-37.833277,144.960784,0
2,Albert Park,5e4bc8fe3a33460008031e81,AMAN INDIAN RESTAURANT,0,-37.839276,144.967427,0
3,Albert Park,565580a7498eec9f81a1a2a1,My Masala Indian Cuisine,0,-37.831126,144.954201,0
4,Albert Park,5655800b498eacd755c2045a,My Masala Indian Cuisine,1,-37.831110,144.953620,0
...,...,...,...,...,...,...,...
98,Upwey,4e0ee71f18a8bf9784d1c2f1,Jai Ho Indian Takeaway,0,-37.902920,145.329860,0
99,Vermont South,4b73bc89f964a5201eba2de3,Handis Indian Restaurant,2,-37.836632,145.195989,0
100,Wantirna South,4bf5f3b2d4cdb713ea3f84fe,Khazana Indian Restaurant,2,-37.869397,145.244062,0
101,Williamstown North,51d7dc23498e1aa1c2fe510b,Raga Indian Cuisine,1,-37.861763,144.902595,0


#### Custer 2

In [191]:
indian_rest_mel.loc[indian_rest_mel['Clus_km'] == 1, indian_rest_mel.columns[[0] + list(range(1, indian_rest_mel.shape[1]))]]

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
91,South Wharf,4b1cbc8ff964a520570924e3,Red Pepper,58,-37.81146,144.972623,1


#### Custer 3

In [192]:
indian_rest_mel.loc[indian_rest_mel['Clus_km'] == 2, indian_rest_mel.columns[[0] + list(range(1, indian_rest_mel.shape[1]))]]

Unnamed: 0,Suburb,ID,Name,Likes,Lat,Lng,Clus_km
7,Alphington,4b46fcacf964a520342a26e3,Everest Indian Restaurant,5,-37.778806,145.018208,2
37,Carlton North,4b9961dcf964a520437735e3,Singhs Gourmet Indian Foods,8,-37.77841,144.978337,2
44,Clifton Hill,4bb708a146d4a593b60cc7c0,Marigold,5,-37.78855,144.991215,2
47,Port Melbourne,4b05874df964a5205e8a22e3,Gaylord Indian Restaurant,5,-37.820263,144.954499,2
70,Docklands,4b768359f964a520b84f2ee3,Shiraaz Fine Indian Cuisine,6,-37.818692,144.95994,2
71,Docklands,4c7a274c97028cfa1a71ddfe,Le Taj Fine Indian Food,7,-37.807956,144.952096,2
74,Docklands,4b05874ef964a520b28a22e3,Nirankar Indian Restaurant,9,-37.814383,144.960548,2
83,East Melbourne,4fa10efbe4b07aaa44e6c896,Punjabi Tandoori Corner,6,-37.825,144.99329,2
89,South Wharf,4b307875f964a520c0f924e3,Flora Indian Restaurant,21,-37.817592,144.966626,2


From the above clustering, we can deduce that "Red Pepper" Restaurant has most number of likes on FourSquares and it must serve pretty good Indian Food and has large satisfied customers.

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX