# 4. Showing cities with clustering on a map

## 1. Imports and such things

Use pandas to handle dataframes

In [1]:
import pandas as pd

Use folium to create pretty maps

In [2]:
import folium
from folium import plugins

Import matplotlib for handling some color-related things

In [3]:
import matplotlib.cm as cm
import matplotlib.colors as colors

Import numpy for some mathematics

In [4]:
import numpy as np

## 2. Import dataframes from notebook 3

This dataframe contains Cities, with their nation, population, Wikipedia URL, Latitude and Longitude, along with their most common values and the cluster label determined in 

In [5]:
df_cities2 = pd.read_pickle('cities2.pickle')
df_cities2.head()

Unnamed: 0,City,Common Venue 1,Common Venue 2,Common Venue 3,Common Venue 4,Common Venue 5,Cluster label,Province,Population,Area,Density,Income,URL,FlagURL,Latitude,Longitude
0,'s-Hertogenbosch (gemeente),Bar,Café,Restaurant,Coffee Shop,Mediterranean Restaurant,0,Noord-Brabant,153434,84.63,1783.7,13900,https://nl.wikipedia.org/wiki/%27s-Hertogenbos...,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,51.689167,5.303056
1,Aa en Hunze,Restaurant,Resort,Park,Diner,Hotel,3,Drenthe,25319,276.35,91.2,13400,https://nl.wikipedia.org/wiki/Aa_en_Hunze,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,53.0,6.75
2,Aalburg,Restaurant,Diner,Supermarket,Ice Cream Shop,Café,2,Noord-Brabant,13191,50.4,257.5,12500,https://nl.wikipedia.org/wiki/Aalburg,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,51.7525,5.131944
3,Aalsmeer,Harbor / Marina,Gym / Fitness Center,Supermarket,Restaurant,Athletics & Sports,4,Noord-Holland,31584,20.58,1512.7,14700,https://nl.wikipedia.org/wiki/Aalsmeer,https://nl.wikipedia.org/wiki/Bestand:Aalsmeer...,52.263889,4.7625
4,Aalten (gemeente),Campground,Supermarket,Ice Cream Shop,Restaurant,Gym,3,Gelderland,26943,96.57,278.7,11900,https://nl.wikipedia.org/wiki/Aalten_(gemeente),https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,51.925,6.580833


## 3. Putting the clusters on the map

How many cluster labels did we create in notebook 3?

In [6]:
kclusters = df_cities2['Cluster label'].nunique()
print ("We have {} different clusters of cities".format(kclusters))

We have 7 different clusters of cities


Create a color table for these clusters

In [7]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

Now actually put the cities on the map with a color depending on their cluster.

In [8]:
# create map of the world using latitude and longitude values
map_brabant = folium.Map(location=[51.916667, 5.566667],zoom_start=10)

fs = plugins.Fullscreen()
map_brabant.add_child(fs)

# add markers to map
for lat, lng, city, cluster in zip(df_cities2['Latitude'], df_cities2['Longitude'], df_cities2['City'], df_cities2['Cluster label']):
    label = '{}'.format(city)
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7
    ).add_to(map_brabant)  
    
map_brabant

## 4. Adding more information to the map

Let's see if we can add some useful information to each of the cities.

I want to create a popup along the following lines:

**City, Nation (population xxxx)**
link to wikipedia page

**Top 10 venue types in this city**
- category 1
- category 2

**top 10 venues in this city**
- venue 1
- venue 2

**Similar cities**
all cities with same cluster label




We can read the first few bits of information from our df_cities2 dataframe. but not the top10 venues or the similar cities.

### 4.1 Find similar cities

The following code make a list of string for each city within a given cluster.

Then it joins this list into the cities dataframe

In [9]:
def make_city_list(cities):
    citylist = []
    for city, nation in zip(cities, nations):
         citylist.append("{} ({})".format(city, nation))
    return citylist

In [10]:
clusters = pd.DataFrame(df_cities2.groupby('Cluster label').apply(lambda x: ", ".join(x['City'].tolist())), columns=['Similar'])
clusters

Unnamed: 0_level_0,Similar
Cluster label,Unnamed: 1_level_1
0,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd..."
1,"Baarle-Nassau, Bernheze, Cranendonck, Deventer..."
2,"Aalburg, Baarn, Berg en Dal (gemeente), Bergei..."
3,"Aa en Hunze, Aalten (gemeente), Achtkarspelen,..."
4,"Aalsmeer, Alblasserdam, Albrandswaard, Alkmaar..."
5,"Ameland (hoofdbetekenis), Gulpen-Wittem, Holla..."
6,"Leudal (gemeente), Maastricht, Roerdalen, Roer..."


In [11]:
df_cities3 = pd.merge(df_cities2, clusters, on=['Cluster label'])

### 4.2 Find top 10 venues

For this we need to re-import the venues table from notebook 1.

Foursquare already returned results in order of recommendation, so we just take the first ten of every city.

We create a list of those top 10 and merge it into the cities dataframe

In [12]:
df_venues = pd.read_pickle('venues.pickle')
df_top_venues = df_venues.groupby(['City'], as_index=False).nth(list(range(5)), dropna=None)

In [13]:
def create_venue_tuple(x):
    lst = []
    for v,c,i in  zip(x['Venue'], x['Venue Category'],  x['Venue ID']):
        lst.append({"Venue": v, "Category": c, "Id": i})
    return lst

In [14]:
venues = pd.DataFrame(df_top_venues.groupby(['City'], as_index=True).apply(create_venue_tuple), columns=['Top Venues'])
venues.head()

Unnamed: 0_level_0,Top Venues
City,Unnamed: 1_level_1
's-Hertogenbosch (gemeente),"[{'Venue': 'Parade', 'Category': 'Pedestrian P..."
Aa en Hunze,"[{'Venue': 'Hof van Saksen', 'Category': 'Reso..."
Aalburg,"[{'Venue': 'Jumbo', 'Category': 'Supermarket',..."
Aalsmeer,"[{'Venue': 'Westeinder Paviljoen', 'Category':..."
Aalten (gemeente),"[{'Venue': 'Vakantiepark De Twee Bruggen', 'Ca..."


In [15]:
df_cities3 = pd.merge(df_cities3, venues, on=['City'])

### 4.3 Clean up most common categories

We would also like to have the most common categories in a list form, instead of separate columns

In [16]:
df_cities3['Categories']  = df_cities3[df_cities3.columns[1:6]].values.tolist()
df_cities3 = df_cities3.drop(df_cities3.columns[1:6], axis=1)

In [17]:
df_cities3.head()

Unnamed: 0,City,Cluster label,Province,Population,Area,Density,Income,URL,FlagURL,Latitude,Longitude,Similar,Top Venues,Categories
0,'s-Hertogenbosch (gemeente),0,Noord-Brabant,153434,84.63,1783.7,13900,https://nl.wikipedia.org/wiki/%27s-Hertogenbos...,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,51.689167,5.303056,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'Parade', 'Category': 'Pedestrian P...","[Bar, Café, Restaurant, Coffee Shop, Mediterra..."
1,Amersfoort (hoofdbetekenis),0,Utrecht,155614,62.88,2429.3,13900,https://nl.wikipedia.org/wiki/Amersfoort_(hoof...,https://nl.wikipedia.org/wiki/Bestand:Amersfoo...,52.156389,5.389722,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'Vlaams Friteshuis van Gogh', 'Cate...","[Restaurant, Café, Snack Place, Ice Cream Shop..."
2,Amsterdam (gemeente),0,Noord-Holland,859732,165.34,4990.2,13900,https://nl.wikipedia.org/wiki/Amsterdam_(gemee...,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,52.363022,4.88205,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'Apple Store', 'Category': 'Electro...","[Café, Art Museum, Coffee Shop, Restaurant, Sa..."
3,Apeldoorn (gemeente),0,Gelderland,161569,339.96,465.4,13400,https://nl.wikipedia.org/wiki/Apeldoorn_(gemee...,https://nl.wikipedia.org/wiki/Bestand:Apeldoor...,52.216667,5.95,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...",[{'Venue': 'Finnegan's Irish pub & restaurant'...,"[Café, Park, Ice Cream Shop, French Restaurant..."
4,Arnhem,0,Gelderland,157660,98.25,1552.2,12800,https://nl.wikipedia.org/wiki/Arnhem,https://nl.wikipedia.org/wiki/Bestand:VlagArnh...,51.983333,5.916667,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'TAPE', 'Category': 'Café', 'Id': '...","[Restaurant, Café, Coffee Shop, Bar, Fast Food..."


### 4.4. Create HTML description for each city

In [18]:
def create_label(x):
    html = "<h1>{}</h1>\n".format(x['City'])
    html += "<p>Latitude: {},<br/> Longitude: {}</p>\n".format(x['Latitude'], x['Longitude'])
    html += "<p><a href=\"{}\">Wikipedia link</a></p>\n".format(x['URL'])
    html += "<p>Most common venue categories:</p>\n<ol>\n"
    for a in x['Categories']:
        html += "<li>{}</li>\n".format(a)
    html += "</ol>\n"
    
    html += "<p>Most popular venues:</p>\n<ol>\n"
    for a in x['Top Venues']:
        html += "<li><a href=\"https://foursquare.com/v/{}\">{}</a> - {}</li>\n".format(a['Id'], a['Venue'].replace('`','\''), a['Category'])
    html += "</ol>\n"
    
    html += "<p>Similar cities [cluster {}]: ".format(x['Cluster label'])
    html += x['Similar']
    html += "</p>\n"
    
    return html

In [19]:
df_cities3['label'] = df_cities3.apply(create_label, axis=1)

In [20]:
df_cities3.head()

Unnamed: 0,City,Cluster label,Province,Population,Area,Density,Income,URL,FlagURL,Latitude,Longitude,Similar,Top Venues,Categories,label
0,'s-Hertogenbosch (gemeente),0,Noord-Brabant,153434,84.63,1783.7,13900,https://nl.wikipedia.org/wiki/%27s-Hertogenbos...,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,51.689167,5.303056,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'Parade', 'Category': 'Pedestrian P...","[Bar, Café, Restaurant, Coffee Shop, Mediterra...",<h1>'s-Hertogenbosch (gemeente)</h1>\n<p>Latit...
1,Amersfoort (hoofdbetekenis),0,Utrecht,155614,62.88,2429.3,13900,https://nl.wikipedia.org/wiki/Amersfoort_(hoof...,https://nl.wikipedia.org/wiki/Bestand:Amersfoo...,52.156389,5.389722,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'Vlaams Friteshuis van Gogh', 'Cate...","[Restaurant, Café, Snack Place, Ice Cream Shop...",<h1>Amersfoort (hoofdbetekenis)</h1>\n<p>Latit...
2,Amsterdam (gemeente),0,Noord-Holland,859732,165.34,4990.2,13900,https://nl.wikipedia.org/wiki/Amsterdam_(gemee...,https://nl.wikipedia.org/wiki/Bestand:Flag_of_...,52.363022,4.88205,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'Apple Store', 'Category': 'Electro...","[Café, Art Museum, Coffee Shop, Restaurant, Sa...",<h1>Amsterdam (gemeente)</h1>\n<p>Latitude: 52...
3,Apeldoorn (gemeente),0,Gelderland,161569,339.96,465.4,13400,https://nl.wikipedia.org/wiki/Apeldoorn_(gemee...,https://nl.wikipedia.org/wiki/Bestand:Apeldoor...,52.216667,5.95,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...",[{'Venue': 'Finnegan's Irish pub & restaurant'...,"[Café, Park, Ice Cream Shop, French Restaurant...",<h1>Apeldoorn (gemeente)</h1>\n<p>Latitude: 52...
4,Arnhem,0,Gelderland,157660,98.25,1552.2,12800,https://nl.wikipedia.org/wiki/Arnhem,https://nl.wikipedia.org/wiki/Bestand:VlagArnh...,51.983333,5.916667,"'s-Hertogenbosch (gemeente), Amersfoort (hoofd...","[{'Venue': 'TAPE', 'Category': 'Café', 'Id': '...","[Restaurant, Café, Coffee Shop, Bar, Fast Food...","<h1>Arnhem</h1>\n<p>Latitude: 51.983333333333,..."


### 4.5 Create circle size based on population count

We want the circles on the map to have a radius of MINCIRCLE for the city with the smallest population, and MAXCIRCLE for the city with the largest population.

In [21]:
MINCIRCLE = 750    # scale smallest city to 500m circle
MAXCIRCLE = 7000  # scale biggest city to 10km circle
popmin = min(df_cities3['Population'])
popmax = max(df_cities3['Population'])

def pop_radius(pop):
    return MINCIRCLE + (MAXCIRCLE - MINCIRCLE)*(pop - popmin)/(popmax - popmin)
    

## 5. Display the map again

In [22]:
df_tomap = df_cities3

In [23]:
# create map of the world using latitude and longitude values
map_brabant = folium.Map(location=[51.916667, 5.566667],zoom_start=8)

fs = plugins.Fullscreen()
map_brabant.add_child(fs)

# add markers to map
for lat, lng, city, cluster, label, pop in zip(df_tomap['Latitude'], df_tomap['Longitude'], df_tomap['City'], df_tomap['Cluster label'], df_tomap['label'], df_tomap['Population']):
    label = folium.Popup(label, max_width=600, parse_html=False)
    folium.Circle(
        [lat, lng],
        radius=pop_radius(pop),
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7
    ).add_to(map_brabant)  
    
map_brabant.save("clustermap.html")
map_brabant