# 4. Showing cities with clustering on a map

## 1. Imports and such things

Use pandas to handle dataframes

In [114]:
import pandas as pd

Use folium to create pretty maps

In [115]:
import folium
from folium import plugins

Import matplotlib for handling some color-related things

In [116]:
import matplotlib.cm as cm
import matplotlib.colors as colors

Import numpy for some mathematics

In [117]:
import numpy as np

## 2. Import dataframes from notebook 3

This dataframe contains Cities, with their nation, population, Wikipedia URL, Latitude and Longitude, along with their most common values and the cluster label determined in 

In [118]:
df_cities2 = pd.read_pickle('cities2.pickle')
df_cities2.head()

Unnamed: 0,City,Nation,Common Venue 1,Common Venue 2,Common Venue 3,Common Venue 4,Common Venue 5,Common Venue 6,Common Venue 7,Common Venue 8,Common Venue 9,Common Venue 10,Cluster label,Population,URL,Latitude,Longitude
0,Abidjan,Ivory Coast,Shopping Mall,Ice Cream Shop,Italian Restaurant,African Restaurant,Supermarket,Pharmacy,Bakery,Bar,Music Venue,Pizza Place,9,4765000,https://en.wikipedia.org/wiki/Abidjan,5.316667,-4.033333
1,Abu Dhabi,United Arab Emirates,Café,Beach,Middle Eastern Restaurant,Restaurant,Indian Restaurant,Hotel,Golf Course,Italian Restaurant,Bakery,Filipino Restaurant,3,1145000,https://en.wikipedia.org/wiki/Abu_Dhabi,24.466667,54.366667
2,Abuja,Nigeria,Shopping Mall,Fast Food Restaurant,Lounge,African Restaurant,Department Store,Restaurant,Indian Restaurant,Fried Chicken Joint,Pizza Place,Pharmacy,9,1235880,https://en.wikipedia.org/wiki/Abuja,9.066667,7.483333
3,Accra,Ghana,Shopping Mall,Café,Restaurant,American Restaurant,Beach,Grocery Store,Movie Theater,Chinese Restaurant,Snack Place,Coffee Shop,9,2070463,https://en.wikipedia.org/wiki/Accra,5.55,-0.2
4,Addis Ababa,Ethiopia,Italian Restaurant,Ethiopian Restaurant,Pizza Place,Grocery Store,Hotel,Greek Restaurant,American Restaurant,Restaurant,Massage Studio,Fast Food Restaurant,5,3103673,https://en.wikipedia.org/wiki/Addis_Ababa,9.03,38.74


## 3. Putting the clusters on the map

How many cluster labels did we create in notebook 3?

In [119]:
kclusters = df_cities2['Cluster label'].nunique()
print ("We have {} different clusters of cities".format(kclusters))

We have 16 different clusters of cities


Create a color table for these clusters

In [120]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

Now actually put the cities on the map with a color depending on their cluster.

In [121]:
# create map of the world using latitude and longitude values
map_world = folium.Map(location=[0,0],zoom_start=2)

fs = plugins.Fullscreen()
map_world.add_child(fs)

# add markers to map
for lat, lng, city, nation, cluster in zip(df_cities2['Latitude'], df_cities2['Longitude'], df_cities2['City'], df_cities2['Nation'], df_cities2['Cluster label']):
    label = '{}, {}'.format(city, nation)
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7
    ).add_to(map_world)  
    
map_world

## 4. Adding more information to the map

Let's see if we can add some useful information to each of the cities.

I want to create a popup along the following lines:

**City, Nation (population xxxx)**
link to wikipedia page

**Top 10 venue types in this city**
- category 1
- category 2

**top 10 venues in this city**
- venue 1
- venue 2

**Similar cities**
all cities with same cluster label




We can read the first few bits of information from our df_cities2 dataframe. but not the top10 venues or the similar cities.

### 4.1 Find similar cities

The following code make a list of string for each city within a given cluster.

Then it joins this list into the cities dataframe

In [122]:
def make_city_list(cities, nations):
    citylist = []
    for city, nation in zip(cities, nations):
         citylist.append("{} ({})".format(city, nation))
    return citylist

In [123]:
clusters = pd.DataFrame(df_cities2.groupby('Cluster label').apply(lambda x: make_city_list(x['City'], x['Nation'])), columns=['Similar'])

In [124]:
df_cities3 = pd.merge(df_cities2, clusters, on=['Cluster label'])

### 4.2 Find top 10 venues

For this we need to re-import the venues table from notebook 1.

Foursquare already returned results in order of recommendation, so we just take the first ten of every city.

We create a list of those top 10 and merge it into the cities dataframe

In [125]:
df_venues = pd.read_pickle('venues.pickle')
df_top_venues = df_venues.groupby(['City','Nation'], as_index=False).nth(list(range(9)), dropna=None)

In [144]:
def create_venue_tuple(x):
    lst = []
    for v,c,i in  zip(x['Venue'], x['Venue Category'],  x['Venue ID']):
        lst.append({"Venue": v, "Category": c, "Id": i})
    return lst

In [145]:
venues = pd.DataFrame(df_top_venues.groupby(['City', 'Nation'], as_index=False).apply(create_venue_tuple), columns=['Top Venues'])
venues.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Top Venues
City,Nation,Unnamed: 2_level_1
Abidjan,Ivory Coast,"[{'Venue': 'Hollywood Boulevard', 'Category': ..."
Abu Dhabi,United Arab Emirates,"[{'Venue': 'Beach Rotana Abu Dhabi', 'Category..."
Abuja,Nigeria,"[{'Venue': 'Dunes', 'Category': 'Department St..."
Accra,Ghana,"[{'Venue': 'The Honeysuckle', 'Category': 'Bar..."
Addis Ababa,Ethiopia,"[{'Venue': 'Beer garden', 'Category': 'Brewery..."


In [146]:
df_cities3 = pd.merge(df_cities3, venues, on=['City','Nation'])

### 4.3 Clean up most common categories

We would also like to have the most common categories in a list form, instead of separate columns

In [147]:
df_cities3['Categories']  = df_cities3[df_cities3.columns[2:12]].values.tolist()
df_cities3 = df_cities3.drop(df_cities3.columns[2:12], axis=1)

In [148]:
df_cities3.head()

Unnamed: 0,City,Nation,Cluster label,Population,URL,Latitude,Longitude,Similar,Top Venues,Categories
0,Abidjan,Ivory Coast,9,4765000,https://en.wikipedia.org/wiki/Abidjan,5.316667,-4.033333,"[Abidjan (Ivory Coast), Abuja (Nigeria), Accra...","[{'Venue': 'Hollywood Boulevard', 'Category': ...","[Shopping Mall, Ice Cream Shop, Italian Restau..."
1,Abuja,Nigeria,9,1235880,https://en.wikipedia.org/wiki/Abuja,9.066667,7.483333,"[Abidjan (Ivory Coast), Abuja (Nigeria), Accra...","[{'Venue': 'Dunes', 'Category': 'Department St...","[Shopping Mall, Fast Food Restaurant, Lounge, ..."
2,Accra,Ghana,9,2070463,https://en.wikipedia.org/wiki/Accra,5.55,-0.2,"[Abidjan (Ivory Coast), Abuja (Nigeria), Accra...","[{'Venue': 'The Honeysuckle', 'Category': 'Bar...","[Shopping Mall, Café, Restaurant, American Res..."
3,Algiers,Algeria,9,3415811,https://en.wikipedia.org/wiki/Algiers,36.753889,3.058889,"[Abidjan (Ivory Coast), Abuja (Nigeria), Accra...","[{'Venue': 'Restaurant Le Thyrolien', 'Categor...","[Restaurant, Café, Burger Joint, Seafood Resta..."
4,Basra,Iraq,9,2750000,https://en.wikipedia.org/wiki/Basra,30.5,47.816667,"[Abidjan (Ivory Coast), Abuja (Nigeria), Accra...","[{'Venue': 'AlKhaima (الخيمة)', 'Category': 'M...","[Clothing Store, Middle Eastern Restaurant, Ma..."


### 4.4. Create HTML description for each city

In [157]:
def create_label(x):
    html = "<h1>{}, {}</h1>\n".format(x['City'], x['Nation'])
    html += "<p>Population: {}</p>\n".format(x['Population'])
    html += "<p>Latitude: {},<br/> Longitude: {}</p>\n".format(x['Latitude'], x['Longitude'])
    html += "<p><a href=\"{}\">Wikipedia link</a></p>\n".format(x['URL'])
    html += "<p>Most common venue categories:</p>\n<ol>\n"
    for a in x['Categories']:
        html += "<li>{}</li>\n".format(a)
    html += "</ol>\n"
    
    html += "<p>Most popular venues:</p>\n<ol>\n"
    for a in x['Top Venues']:
        html += "<li><a href=\"https://foursquare.com/v/{}\">{}</a> - {}</li>\n".format(a['Id'], a['Venue'], a['Category'])
    html += "</ol>\n"
    
    html += "<p>Similar cities [cluster {}]: ".format(x['Cluster label'])
    html += ", ".join(x['Similar'])
    html += "p\n"
    
    return html

In [158]:
df_cities3["label"] = df_cities3.apply(create_label, axis=1)

## 5. Display the map again

In [159]:
# create map of the world using latitude and longitude values
map_world = folium.Map(location=[0,0],zoom_start=2)

fs = plugins.Fullscreen()
map_world.add_child(fs)

# add markers to map
for lat, lng, city, nation, cluster, label in zip(df_cities3['Latitude'], df_cities3['Longitude'], df_cities3['City'], df_cities3['Nation'], df_cities3['Cluster label'], df_cities3['label']):
    label = folium.Popup(label, max_width=600)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7
    ).add_to(map_world)  
    
map_world.save("clustermap.html")
map_world