In [32]:
import numpy as np
import pandas as pd

import requests # library to handle requests
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as Colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

import geocoder
from geopy.geocoders import Nominatim

from bs4 import BeautifulSoup
from urllib.request import urlopen

In [67]:
London=pd.read_csv('London_postcodes.csv')
London.head()

Unnamed: 0,Postcode,In Use?,Latitude,Longitude,Easting,Northing,Grid Ref,County,District,Ward,...,Quality,User Type,Last updated,Nearest station,Distance to station,Postcode area,Postcode district,Police force,Water company,Plus Code
0,BR1 1AA,Yes,51.401546,0.015415,540291,168873,TQ402688,Greater London,Bromley,Bromley Town,...,1,0,19-02-2020,Bromley South,0.218257,BR,BR1,Metropolitan Police,Thames Water,9F32C228+J5
1,BR1 1AB,Yes,51.406333,0.015208,540262,169405,TQ402694,Greater London,Bromley,Bromley Town,...,1,0,19-02-2020,Bromley North,0.253666,BR,BR1,Metropolitan Police,Thames Water,9F32C248+G3
2,BR1 1AD,No,51.400057,0.016715,540386,168710,TQ403687,Greater London,Bromley,Bromley Town,...,1,1,19-02-2020,Bromley South,0.044559,BR,BR1,Metropolitan Police,,9F32C228+2M
3,BR1 1AE,Yes,51.404543,0.014195,540197,169204,TQ401692,Greater London,Bromley,Bromley Town,...,1,0,19-02-2020,Bromley North,0.462939,BR,BR1,Metropolitan Police,Thames Water,9F32C237+RM
4,BR1 1AF,Yes,51.401392,0.014948,540259,168855,TQ402688,Greater London,Bromley,Bromley Town,...,1,0,19-02-2020,Bromley South,0.227664,BR,BR1,Metropolitan Police,Thames Water,9F32C227+HX


In [68]:
London.columns

Index(['Postcode', 'In Use?', 'Latitude', 'Longitude', 'Easting', 'Northing',
       'Grid Ref', 'County', 'District', 'Ward', 'District Code', 'Ward Code',
       'Country', 'County Code', 'Constituency', 'Introduced', 'Terminated',
       'Parish', 'National Park', 'Population', 'Households', 'Built up area',
       'Built up sub-division', 'Lower layer super output area', 'Rural/urban',
       'Region', 'Altitude', 'London zone', 'LSOA Code', 'Local authority',
       'MSOA Code', 'Middle layer super output area', 'Parish Code',
       'Census output area', 'Constituency Code',
       'Index of Multiple Deprivation', 'Quality', 'User Type', 'Last updated',
       'Nearest station', 'Distance to station', 'Postcode area',
       'Postcode district', 'Police force', 'Water company', 'Plus Code'],
      dtype='object')

In [69]:
london=London[['Latitude', 'Longitude', 'District','Ward']]
london.head()

Unnamed: 0,Latitude,Longitude,District,Ward
0,51.401546,0.015415,Bromley,Bromley Town
1,51.406333,0.015208,Bromley,Bromley Town
2,51.400057,0.016715,Bromley,Bromley Town
3,51.404543,0.014195,Bromley,Bromley Town
4,51.401392,0.014948,Bromley,Bromley Town


In [70]:
london=london.groupby(by=['District','Ward']).mean().reset_index()
london.head()

Unnamed: 0,District,Ward,Latitude,Longitude
0,Barking and Dagenham,Abbey,51.539458,0.078629
1,Barking and Dagenham,Alibon,51.545854,0.150324
2,Barking and Dagenham,Becontree,51.554243,0.118935
3,Barking and Dagenham,Chadwell Heath,51.580459,0.136093
4,Barking and Dagenham,Eastbrook,51.555709,0.169543


In [71]:
london_map = folium.Map(location=[51.5074, -0.1278], zoom_start=12)

X = london['Latitude']
Y = london['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=16, random_state=0).fit(Z)

clusters = kmeans.labels_

# set color scheme for the clusters
x = np.arange(16)
ys = [i + x + (i*x)**2 for i in range(16)]
colors_array = cm.viridis(np.linspace(0, 1, len(ys)))

viridis = [Colors.rgb2hex(i) for i in colors_array]

london['Cluster'] = clusters

for latitude, longitude, district, cluster in zip(london['Latitude'], london['Longitude'], london['District'], london['Cluster']):
    label = folium.Popup(district, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=viridis[int(cluster)-1],
        fill_color=viridis[int(cluster)-1],
        fill=True,
        fill_opacity=0.7).add_to(london_map)
        #fill_color=colors[cluster]

In [72]:
london_map

In [73]:
london.head()

Unnamed: 0,District,Ward,Latitude,Longitude,Cluster
0,Barking and Dagenham,Abbey,51.539458,0.078629,3
1,Barking and Dagenham,Alibon,51.545854,0.150324,12
2,Barking and Dagenham,Becontree,51.554243,0.118935,12
3,Barking and Dagenham,Chadwell Heath,51.580459,0.136093,12
4,Barking and Dagenham,Eastbrook,51.555709,0.169543,12


In [35]:
newyork_data = urlopen("https://cocl.us/new_york_dataset").read()
newyork_data = json.loads(newyork_data)

In [45]:
neighborhoods_data = newyork_data['features']
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
nyc = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    nyc = nyc.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [46]:
nyc.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [47]:
nyc=nyc.groupby(by=['Borough','Neighborhood']).mean().reset_index()
nyc.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Allerton,40.865788,-73.859319
1,Bronx,Baychester,40.866858,-73.835798
2,Bronx,Bedford Park,40.870185,-73.885512
3,Bronx,Belmont,40.857277,-73.888452
4,Bronx,Bronxdale,40.852723,-73.861726


In [48]:
nyc['Borough'].nunique()

5

In [59]:
nyc_map = folium.Map(location=[nyc['Latitude'].mean(), nyc['Longitude'].mean()], zoom_start=12)

X = nyc['Latitude']
Y = nyc['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=5, random_state=0).fit(Z)

clusters = kmeans.labels_

# set color scheme for the clusters
x = np.arange(5)
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.viridis(np.linspace(0, 1, len(ys)))

viridis = [Colors.rgb2hex(i) for i in colors_array]

nyc['Cluster'] = clusters

for latitude, longitude, borough, cluster in zip(nyc['Latitude'], nyc['Longitude'], nyc['Borough'], nyc['Cluster']):
    label = folium.Popup(borough, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=viridis[int(cluster)-1],
        fill_color=viridis[int(cluster)-1],
        fill=True,
        fill_opacity=0.7).add_to(nyc_map)
        #fill_color=colors[cluster]

In [60]:
nyc_map

In [74]:
nyc.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster
0,Bronx,Allerton,40.865788,-73.859319,4
1,Bronx,Baychester,40.866858,-73.835798,4
2,Bronx,Bedford Park,40.870185,-73.885512,4
3,Bronx,Belmont,40.857277,-73.888452,4
4,Bronx,Bronxdale,40.852723,-73.861726,4


In [76]:
nyc=nyc.rename(columns={"Borough": "District", "Neighborhood": "Ward"})
nyc.head()

Unnamed: 0,District,Ward,Latitude,Longitude,Cluster
0,Bronx,Allerton,40.865788,-73.859319,4
1,Bronx,Baychester,40.866858,-73.835798,4
2,Bronx,Bedford Park,40.870185,-73.885512,4
3,Bronx,Belmont,40.857277,-73.888452,4
4,Bronx,Bronxdale,40.852723,-73.861726,4


In [77]:
# function that extracts the category of the venue
def get_category_type(row):
    """Extract venue categories from json GET request"""
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [79]:
CLIENT_ID = 'ZMHWBS0SR12Z3YDYVHJVTZPRK3U1ZP3I2TYQAJ5CU3JUHMB5'
CLIENT_SECRET = 'H3TT0XT3P5TIAFCV1Y2UUVLF42N44DICNKLUELK34H2TKLFR'
VERSION = '20181020'

In [82]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000, LIMIT=100):
    """Return specific number of Nearby venues within a given radius"""
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [83]:
nyc_venues = getNearbyVenues(names=nyc['Ward'],
                                   latitudes=nyc['Latitude'],
                                   longitudes=nyc['Longitude'],
                                   radius=1000
                                  )
nyc_venues.head()

ConnectionError: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?&client_id=ZMHWBS0SR12Z3YDYVHJVTZPRK3U1ZP3I2TYQAJ5CU3JUHMB5&client_secret=H3TT0XT3P5TIAFCV1Y2UUVLF42N44DICNKLUELK34H2TKLFR&v=20181020&ll=40.86578787802982,-73.85931863221647&radius=1000&limit=100 (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x2838C7B0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))