<h1>Capstone Project - Segmenting Hospitals in Greater London based on distance</h1>

<h3>Import all the necessary libraries</h3>

In [3]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.0.2p             |       h470a237_1         3.1 MB  conda-forge
    certifi-2018.10.15         |        py36_1000         138 KB  conda-forge
    geopy-1.17.0               |             py_0          49 KB  conda-forge
    ca-certificates-2018.10.15 |       ha4d7672_0         135 KB  conda-forge
    conda-4.5.11               |        py36_1000         651 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.1 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0            conda-forge
    geopy:           

<h1>List all the hospitals in Greater London, UK</h1>

<h3>Connect FourSquare API to extract the details</h3>

In [27]:
CLIENT_ID = 'ZVPHYUXAOWDL1WASNXQZ3DOEHFZ4DTO2HFO0REQSPZXU0PDV'
CLIENT_SECRET = 'MEVWT2NFOQN3RPHM1IX0G3CGUQUQCTNQ3FCCYNJWRZ4AHM2Q'
VERSION = '20180605'
LIMIT=3000
radius=100000
address = 'London'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = 51.5074
longitude = -0.1278
query = 'hospital'
fsurl = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}'.format(CLIENT_ID,
                                                                                                                           CLIENT_SECRET,
                                                                                                                           VERSION,
                                                                                                                          latitude,
                                                                                                                          longitude,query,LIMIT,radius)
results = requests.get(fsurl).json()
results



{'meta': {'code': 200, 'requestId': '5bcd79051ed2194285da4f5b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'London',
  'headerFullLocation': 'London',
  'headerLocationGranularity': 'city',
  'query': 'hospital',
  'totalResults': 52,
  'suggestedBounds': {'ne': {'lat': 51.534400027000025,
    'lng': -0.08450143318886119},
   'sw': {'lat': 51.48039997299997, 'lng': -0.1710985668111388}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ac518f6f964a5205eaf20e3',
       'name': "St Thomas' Hospital",
       'location': {'address': 'Westminster Bridge Rd',
        'crossStreet': 'Westminster Bridge Road',
        'lat': 51.49971047720161,
        'lng': -0.11890384052741654,
    

<h3>List down all the hospitals with distance from Greater London</h3>

In [47]:
venues = results['response']['groups'][0]['items']
hospitals = json_normalize(venues)
columns = ['venue.name','venue.location.address','venue.location.city','venue.location.distance', 'venue.location.lat', 'venue.location.lng']
hospitals =hospitals.loc[:, columns]
hospitals.columns = [col.split(".")[-1] for col in hospitals.columns]
#hospitals.sort_values('distance',inplace=True,ascending=False)
#hospitals.reset_index()
hospitals

Unnamed: 0,name,address,city,distance,lat,lng
0,St Thomas' Hospital,Westminster Bridge Rd,London,1054,51.49971,-0.118904
1,Evelina London Children's Hospital,"St Thomas' Hospital, Westminster Bridge Rd",London,1052,51.499773,-0.118814
2,London Gynaecologist,108 Harley Street,London,101,51.506992,-0.129112
3,Great Ormond Street Hospital For Children,Great Ormond St,London,1701,51.522,-0.120526
4,St Thomas' Hand Therapy,Westminster Bridge Road London SE1 7EH,London,1077,51.500675,-0.116616
5,Lambeth Wing,,,1078,51.499886,-0.11797
6,National Hospital for Neurology and Neurosurgery,Queen Square,Bloomsbury,1718,51.522363,-0.121715
7,South Wing,,,1203,51.497849,-0.119673
8,Royal London Hospital For Integrated Medicine,,,1645,51.521613,-0.121253
9,South Westminster Centre for Health,"St Georges House, 82 Vincent Sq.",London,1507,51.494433,-0.134097


<h3>K-means Algorithm and clusters map</h3>

In [66]:
#Run k-means 

# set number of clusters
kclusters = 4

grouped_clustering = hospitals.drop(['name','address','city','lat','lng'],axis=1,inplace=False)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# create a new dataframe that includes the cluster as well .
london_merged = hospitals

# add clustering labels
london_merged['Cluster Labels'] = kmeans.labels_
london_merged

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lng, name, postalcode,cluster in zip(hospitals['lat'], hospitals['lng'], hospitals['name'],hospitals['city'], hospitals['Cluster Labels']):
    label = folium.Popup(str(name) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Cluster Details</h3>

In [71]:
london_merged.loc[london_merged['Cluster Labels'] == 0, london_merged.columns[[1] + list(range(3, london_merged.shape[1]))]]

Unnamed: 0,address,distance,lat,lng,Cluster Labels
3,Great Ormond St,1701,51.522,-0.120526,0
6,Queen Square,1718,51.522363,-0.121715,0
8,,1645,51.521613,-0.121253,0
9,"St Georges House, 82 Vincent Sq.",1507,51.494433,-0.134097,0
12,,1656,51.521723,-0.121334,0
13,10 Harley Street,1680,51.517402,-0.145966,0
14,Lambs Coundit,1773,51.522266,-0.118597,0
15,Huntley Street,1824,51.523142,-0.13512,0
16,27 Welbeck Street,1918,51.518098,-0.149504,0
17,Huntley Street,1930,51.523816,-0.136768,0


In [72]:
london_merged.loc[london_merged['Cluster Labels'] == 1, london_merged.columns[[1] + list(range(3, london_merged.shape[1]))]]

Unnamed: 0,address,distance,lat,lng,Cluster Labels
32,Gray's Inn Road,2508,51.529233,-0.11886,1
39,Great Maze Pond,2815,51.503255,-0.08772,1
40,Royal Hospital Rd.,2957,51.487918,-0.15681,1
42,27 Tooley St.,2885,51.505855,-0.086231,1
43,,2814,51.504209,-0.087503,1
44,Chelsea Bridge Rd.,2816,51.486351,-0.150352,1
45,,2818,51.503239,-0.087683,1
46,,2877,51.503645,-0.086715,1
47,153-173 Marylebone Rd,2892,51.521149,-0.163224,1


In [73]:
london_merged.loc[london_merged['Cluster Labels'] == 2, london_merged.columns[[1] + list(range(3, london_merged.shape[1]))]]

Unnamed: 0,address,distance,lat,lng,Cluster Labels
0,Westminster Bridge Rd,1054,51.49971,-0.118904,2
1,"St Thomas' Hospital, Westminster Bridge Rd",1052,51.499773,-0.118814,2
2,108 Harley Street,101,51.506992,-0.129112,2
4,Westminster Bridge Road London SE1 7EH,1077,51.500675,-0.116616,2
5,,1078,51.499886,-0.11797,2
7,,1203,51.497849,-0.119673,2
48,22 Endell St,836,51.514646,-0.124586,2
49,St Thomas Hospital,1082,51.499786,-0.118077,2


In [74]:
london_merged.loc[london_merged['Cluster Labels'] == 3, london_merged.columns[[1] + list(range(3, london_merged.shape[1]))]]

Unnamed: 0,address,distance,lat,lng,Cluster Labels
10,235 Euston Road,2047,51.525004,-0.136339,3
11,W Smithfield,2274,51.517863,-0.099607,3
18,42/46 Weymouth Street,2127,51.52018,-0.150633,3
19,Grafton street,1976,51.524536,-0.135261,3
20,14 Fitzroy Square,1982,51.523391,-0.140387,3
21,250 Euston Road,1988,51.524584,-0.135633,3
22,5 Beaumont Street,2166,51.520807,-0.15047,3
23,95 Harley Street,2021,51.520614,-0.147822,3
24,45 Bolsover St.,2029,51.522938,-0.143129,3
25,16-18 Westmoreland St,2038,51.519508,-0.149867,3
