In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    scikit-learn-0.20.1        |   py36h22eb022_0         5.7 MB
    liblapack-3.8.0            |      11_openblas          10 KB  conda-forge
    scipy-1.3.2                |   py36h921218d_0        18.0 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    libopenblas-0.3.6          |       h5a2b251_2         7.7 MB
    liblapacke-3.8.0           |      11_openblas          10 KB  conda-forge
    numpy-1.17.3               |   py36h95a1406_0         5.2 MB  conda-forge
    libcblas-3.8.0             |      11_openblas       

In [3]:
!pip install beautifulsoup4
from bs4 import BeautifulSoup
url = "https://en.wikipedia.org/wiki/Foshan"

text_result = requests.get(url).text #get the entire html of the article as a str
html_parsed_result = BeautifulSoup(text_result, 'html.parser') #transform the text to html

neightborhood_info_table = html_parsed_result.find('table', class_ = 'wikitable sortable collapsible')
neightborhood_rows = neightborhood_info_table.find_all('tr')

# extract the info ('Postcode', 'Borough', 'Neighbourhood') from the table
neightborhood_info = []
for row in neightborhood_rows:
    info = row.text.split('\n')[1:-5] # remove empty str (first and last items)
    neightborhood_info.append(info)
    
neightborhood_info[3:10]



[['440600', 'Foshan', '3848.49', '7197394', 'Chancheng', '528000'],
 ['440604',
  'Chancheng',
  '154.15',
  '1,101,077',
  'Zumiao Subdistrict',
  '528000'],
 ['440605',
  'Nanhai',
  '1073.94',
  '2,588,844',
  'Guicheng Subdistrict',
  '528200'],
 ['440606', 'Shunde', '806.55', '2,464,784', 'Daliang Subdistrict', '528300'],
 ['440607', 'Sanshui', '874.22', '622,645', 'Xinan Subdistrict', '528100'],
 ['440608', 'Gaoming', '939.64', '420,044', 'Hecheng Subdistrict', '528500']]

In [4]:
neighborhood_df = pd.DataFrame(neightborhood_info[3:10], columns=['Division Code','Division','Area(km^2)','Population','Seat','Postal Code'])

neighborhood_df.head(10)

Unnamed: 0,Division Code,Division,Area(km^2),Population,Seat,Postal Code
0,440600,Foshan,3848.49,7197394,Chancheng,528000
1,440604,Chancheng,154.15,1101077,Zumiao Subdistrict,528000
2,440605,Nanhai,1073.94,2588844,Guicheng Subdistrict,528200
3,440606,Shunde,806.55,2464784,Daliang Subdistrict,528300
4,440607,Sanshui,874.22,622645,Xinan Subdistrict,528100
5,440608,Gaoming,939.64,420044,Hecheng Subdistrict,528500


In [5]:
Division = neighborhood_df['Division'].values
print(Division)

['Foshan' 'Chancheng' 'Nanhai' 'Shunde' 'Sanshui' 'Gaoming']


In [6]:
def get_coords_local(localidad, output_as='center'):
    # create url
    url = '{0}{1}{2}'.format('http://nominatim.openstreetmap.org/search.php?q=',
                             localidad+', Foshan',
                             '&format=json&polygon=0')
    response = requests.get(url).json()[0]

    # parse response to list
    if output_as == 'boundingbox':
        lst = response[output_as]
        output = [float(i) for i in lst]
    if output_as == 'center':
        lst = [response.get(key) for key in ['lat','lon']]
        output = [float(i) for i in lst]
    return output


In [7]:

df = neighborhood_df.copy()

latitudeCln = []
longitudeCln = []
for i in Division:
    print(i)
    lat, long = get_coords_local(localidad=i, output_as='center')
    latitudeCln.append(lat)
    longitudeCln.append(long)

df['Latitude'] = latitudeCln
df['Longitude'] = longitudeCln


Foshan
Chancheng
Nanhai
Shunde
Sanshui
Gaoming


In [8]:
df

Unnamed: 0,Division Code,Division,Area(km^2),Population,Seat,Postal Code,Latitude,Longitude
0,440600,Foshan,3848.49,7197394,Chancheng,528000,23.024769,113.114633
1,440604,Chancheng,154.15,1101077,Zumiao Subdistrict,528000,23.024769,113.114633
2,440605,Nanhai,1073.94,2588844,Guicheng Subdistrict,528200,23.060781,112.982365
3,440606,Shunde,806.55,2464784,Daliang Subdistrict,528300,22.844685,113.162086
4,440607,Sanshui,874.22,622645,Xinan Subdistrict,528100,23.268599,112.941953
5,440608,Gaoming,939.64,420044,Hecheng Subdistrict,528500,22.832154,112.703403


In [9]:
df2=df[['Division','Latitude','Longitude']]
df2

Unnamed: 0,Division,Latitude,Longitude
0,Foshan,23.024769,113.114633
1,Chancheng,23.024769,113.114633
2,Nanhai,23.060781,112.982365
3,Shunde,22.844685,113.162086
4,Sanshui,23.268599,112.941953
5,Gaoming,22.832154,112.703403


In [10]:
address = 'Foshan'

geolocator = Nominatim(user_agent="heheda")
location = geolocator.geocode(address, timeout=60, exactly_one=True)
latitude = location.latitude
longitude = location.longitude
print('The decimal coordinates of Foshan are {}, {}.'.format(latitude, longitude))

The decimal coordinates of Foshan are 23.0247687, 113.1146335.


In [11]:
# create map of Foshan using latitude and longitude values
map_foshan = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, local in zip(df2['Latitude'], df2['Longitude'], df2['Division']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_foshan)  
    
map_foshan

In [12]:
# function to repeat the exploring process to all the neighborhoods in Toronto
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Division', 
                  'Division Latitude', 
                  'Division Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [13]:

CLIENT_ID = 'Z1XGYCLOLV5HISJLSGIWUCCADRBD21FQYU2EWGG0JIGGL2FJ' # your Foursquare ID
CLIENT_SECRET = '2BIM2KZHMWUVAXWE4IY5ISOTYYZIAUEBIDMDJPC3SHYHVNCW' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 500
RADIUS=5000

In [14]:
# Use category id 4bf58dd8d48988d16c941735 to only get the burger joints
foshan_venues_burger = getNearbyVenues(names=df2['Division'], latitudes=df2['Latitude'], longitudes=df2['Longitude'], radius=5000, categoryIds='4bf58dd8d48988d16c941735')
foshan_venues_burger.head(500)

Unnamed: 0,Division,Division Latitude,Division Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.027726,113.111342,Fast Food Restaurant
1,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.036622,113.083777,Fast Food Restaurant
2,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.014463,113.102361,Fast Food Restaurant
3,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.011043,113.107831,Fast Food Restaurant
4,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.01839,113.109742,Fast Food Restaurant
5,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.032944,113.140793,Fast Food Restaurant
6,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.031211,113.112069,Fast Food Restaurant
7,Foshan,23.024769,113.114633,Burger King (汉堡王),23.013239,113.116451,Fast Food Restaurant
8,Foshan,23.024769,113.114633,McDonald's (麦当劳),23.067561,113.124633,Fast Food Restaurant
9,Foshan,23.024769,113.114633,Burger King (汉堡王),23.036343,113.154076,Fast Food Restaurant


In [15]:
# function to add markers for given venues to map
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Division'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [16]:
map_foshan_burger = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(foshan_venues_burger, 'red', map_foshan_burger)
map_foshan_burger

In [17]:
foshan_venues_highschools = getNearbyVenues(names=df2['Division'], latitudes=df2['Latitude'], longitudes=df2['Longitude'], radius=5000, categoryIds='4bf58dd8d48988d13d941735')
foshan_venues_highschools.head()

Unnamed: 0,Division,Division Latitude,Division Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Foshan,23.024769,113.114633,南海实验中学 Nanhai Experimental Highschool,23.026488,113.164138,High School
1,Foshan,23.024769,113.114633,C教學樓 課室,22.980569,113.119985,High School
2,Chancheng,23.024769,113.114633,南海实验中学 Nanhai Experimental Highschool,23.026488,113.164138,High School
3,Chancheng,23.024769,113.114633,C教學樓 課室,22.980569,113.119985,High School


In [18]:
map_foshan_highschools = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(foshan_venues_highschools, 'green', map_foshan_highschools)
map_foshan_highschools

In [19]:
foshan_venues_uni = getNearbyVenues(names=df2['Division'], latitudes=df2['Latitude'], longitudes=df2['Longitude'], radius=5000, categoryIds='4bf58dd8d48988d1ae941735')
foshan_venues_uni.head()

Unnamed: 0,Division,Division Latitude,Division Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Foshan,23.024769,113.114633,佛山大学,23.026899,113.091588,University
1,Foshan,23.024769,113.114633,南海广播电视大学,23.027203,113.140678,University
2,Chancheng,23.024769,113.114633,佛山大学,23.026899,113.091588,University
3,Chancheng,23.024769,113.114633,南海广播电视大学,23.027203,113.140678,University
4,Sanshui,23.268599,112.941953,佛山职业技术学院,23.244025,112.975645,University


In [20]:
map_foshan_uni = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(foshan_venues_uni, 'gold', map_foshan_uni)
map_foshan_uni

In [21]:
foshan_venues_office = getNearbyVenues(names=df2['Division'], latitudes=df2['Latitude'], longitudes=df2['Longitude'], radius=5000, categoryIds='4d4b7105d754a06375d81259')
foshan_venues_office.head()

Unnamed: 0,Division,Division Latitude,Division Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Foshan,23.024769,113.114633,祖庙 Zumiao / Foshan Ancestral Temple,23.031696,113.107776,Buddhist Temple
1,Foshan,23.024769,113.114633,中國聯通南桂東直屬營業廳,23.035,113.137708,Building
2,Foshan,23.024769,113.114633,佛山市第三人民医院,22.982402,113.102,Hospital
3,Foshan,23.024769,113.114633,佛山市中级人民法院,22.992151,113.137789,Building
4,Foshan,23.024769,113.114633,禅城区中心医院 精进楼,23.0076,113.08,Medical Center


In [22]:
map_foshan_office = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(foshan_venues_office, 'fuchsia', map_foshan_office)
map_foshan_office

In [23]:

def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Division').count()
    
    for n in startDf['Division']:
        try:
            startDf.loc[startDf['Division'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Division'] == n,columnTitle] = 0

In [24]:
df_data = df2.copy()
#df_data.rename(columns={'Division':'Localidad'}, inplace=True)
addColumn(df_data, 'Burger', foshan_venues_burger)
addColumn(df_data, 'High Schools', foshan_venues_highschools)
addColumn(df_data, 'Universities', foshan_venues_uni)
addColumn(df_data, 'Offices', foshan_venues_office)
df_data

Unnamed: 0,Division,Latitude,Longitude,Burger,High Schools,Universities,Offices
0,Foshan,23.024769,113.114633,11.0,2.0,2.0,48.0
1,Chancheng,23.024769,113.114633,11.0,2.0,2.0,48.0
2,Nanhai,23.060781,112.982365,0.0,0.0,0.0,16.0
3,Shunde,22.844685,113.162086,1.0,0.0,0.0,27.0
4,Sanshui,23.268599,112.941953,0.0,0.0,1.0,7.0
5,Gaoming,22.832154,112.703403,0.0,0.0,0.0,2.0


In [25]:
# negative weight, because Jeronimo wants to open a burger joint and thus wants to avoid concurrence as much as possible
weight_burger = -1

# positive weight, because high school students are good customers
weight_schools = 1

# positive weight, because uni students are good customers
weight_uni = 1.5

# positive weight because employees are even better customers
weight_offices = 2

In [26]:
df_weighted = df_data[['Division']].copy()

In [27]:
df_weighted['Score'] = df_data['Burger'] * weight_burger + df_data['High Schools'] * weight_schools + df_data['Universities'] * weight_uni + df_data['Offices'] * weight_offices
df_weighted = df_weighted.sort_values(by=['Score'], ascending=False)
df_weighted

Unnamed: 0,Division,Score
0,Foshan,90.0
1,Chancheng,90.0
3,Shunde,53.0
2,Nanhai,32.0
4,Sanshui,15.5
5,Gaoming,4.0


In [29]:

map_fo_result = folium.Map(location=[latitude, longitude], zoom_start=15)

fo_win = df2[df2['Division'] == 'Foshan']

for lat, lng, local in zip(fo_win['Latitude'], fo_win['Longitude'], fo_win['Division']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_fo_result) 

addToMap(foshan_venues_burger[foshan_venues_burger['Division'] == 'Foshan'], 'red', map_fo_result)
addToMap(foshan_venues_highschools[foshan_venues_highschools['Division'] == 'Foshan'], 'green', map_fo_result)
addToMap(foshan_venues_uni[foshan_venues_uni['Division'] == 'Foshan'], 'gold', map_fo_result)
addToMap(foshan_venues_office[foshan_venues_office['Division'] == 'Foshan'], 'fuchsia', map_fo_result)

map_fo_result