A Knowledge based approach for neighbourhood recommendation system using Spectral Clustering

---



---



In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import dbscan from clustering stage
#from sklearn.cluster import DBSCAN

from sklearn.cluster import SpectralClustering

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [None]:
address = 'Munich'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude_munich = location.latitude
longitude_munich = location.longitude
print('The geograpical coordinate of Munich are {}, {}.'.format(latitude_munich, longitude_munich))

The geograpical coordinate of Munich are 48.1371079, 11.5753822.


In [None]:
url = 'https://www.muenchen.de/int/en/living/postal-codes.html'
munich_data_list = pd.read_html(url)
munich_data = munich_data_list[0]
munich_data

Unnamed: 0,District,Postal Code
0,Allach-Untermenzing,"80995, 80997, 80999, 81247, 81249"
1,Altstadt-Lehel,"80331, 80333, 80335, 80336, 80469, 80538, 80539"
2,Au-Haidhausen,"81541, 81543, 81667, 81669, 81671, 81675, 81677"
3,Aubing-Lochhausen-Langwied,"81243, 81245, 81249"
4,Berg am Laim,"81671, 81673, 81735, 81825"
5,Bogenhausen,"81675, 81677, 81679, 81925, 81927, 81929"
6,Feldmoching-Hasenbergl,"80933, 80935, 80995"
7,Hadern,"80689, 81375, 81377"
8,Laim,"80686, 80687, 80689"
9,Ludwigsvorstadt-Isarvorstadt,"80335, 80336, 80337, 80469"


In [None]:
munich_data_cleaned = pd.DataFrame(columns=['District', 'Postal Code'])
munich_data_cleaned.head()

Unnamed: 0,District,Postal Code


In [None]:
items = []
for idx, codes in enumerate(munich_data['Postal Code']):
    code_list = codes.split(',')
    district = munich_data['District'][idx]
    for element in code_list:
        element = element.replace(' ', '')
        items.append({'District': district, 'Postal Code': element})

In [None]:
munich_data_cleaned = munich_data_cleaned.append(items)
munich_data_cleaned.head()

Unnamed: 0,District,Postal Code
0,Allach-Untermenzing,80995
1,Allach-Untermenzing,80997
2,Allach-Untermenzing,80999
3,Allach-Untermenzing,81247
4,Allach-Untermenzing,81249


In [None]:
# credentials
CLIENT_ID = 'RTWMIKFZBGXUDJA4ZZ1DUE5FR1UVE0MOXIIPFYCBZC11BKBY' # your Foursquare ID
CLIENT_SECRET = '0IFVY5ARZTKV0PITEFKTKS4G1V3A1LHEGPH5KQYFZVKOJSYN' # your Foursquare Secret
VERSION = '20200410' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RTWMIKFZBGXUDJA4ZZ1DUE5FR1UVE0MOXIIPFYCBZC11BKBY
CLIENT_SECRET:0IFVY5ARZTKV0PITEFKTKS4G1V3A1LHEGPH5KQYFZVKOJSYN


In [None]:
# create new dataframe additionally containing the latitude and longitude values of each district and postal code mapping 
munich_data_ll = pd.DataFrame(columns=['District', 'Postal Code', 'Latitude', 'Longitude'])

# loop over all entries of old data frame and store according values
items = []
for idx, district in enumerate(munich_data_cleaned['District']):
    code = munich_data_cleaned['Postal Code'][idx]
    address = district + ', ' + code # to get format of address

    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    items.append({'District': district, 
                  'Postal Code': code,
                  'Latitude': latitude,
                  'Longitude': longitude})

In [None]:
munich_data_ll = munich_data_ll.append(items)
munich_data_ll.head()

Unnamed: 0,District,Postal Code,Latitude,Longitude
0,Allach-Untermenzing,80995,48.195157,11.462973
1,Allach-Untermenzing,80997,48.195157,11.462973
2,Allach-Untermenzing,80999,48.195157,11.462973
3,Allach-Untermenzing,81247,48.195157,11.462973
4,Allach-Untermenzing,81249,48.195157,11.462973


In [None]:
# create map of munich using latitude and longitude values
map_munich = folium.Map(location=[munich_data_ll["Latitude"].iloc[0], munich_data_ll["Longitude"].iloc[0]], zoom_start=11)

# add markers to map
for lat, lng, district in zip(munich_data_ll['Latitude'], munich_data_ll['Longitude'], munich_data_ll['District']):
    label = '{}'.format(district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_munich)  
    
map_munich

In [None]:
# function for getting all venues of munich
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [None]:
munich_venues = getNearbyVenues(names=munich_data_ll['District'],
                                   latitudes=munich_data_ll['Latitude'],
                                   longitudes=munich_data_ll['Longitude']
                                  )

Allach-Untermenzing
Allach-Untermenzing
Allach-Untermenzing
Allach-Untermenzing
Allach-Untermenzing
Altstadt-Lehel
Altstadt-Lehel
Altstadt-Lehel
Altstadt-Lehel
Altstadt-Lehel
Altstadt-Lehel
Altstadt-Lehel
Au-Haidhausen
Au-Haidhausen
Au-Haidhausen
Au-Haidhausen
Au-Haidhausen
Au-Haidhausen
Au-Haidhausen
Aubing-Lochhausen-Langwied
Aubing-Lochhausen-Langwied
Aubing-Lochhausen-Langwied
Berg am Laim
Berg am Laim
Berg am Laim
Berg am Laim
Bogenhausen
Bogenhausen
Bogenhausen
Bogenhausen
Bogenhausen
Bogenhausen
Feldmoching-Hasenbergl
Feldmoching-Hasenbergl
Feldmoching-Hasenbergl
Hadern
Hadern
Hadern
Laim
Laim
Laim
Ludwigsvorstadt-Isarvorstadt
Ludwigsvorstadt-Isarvorstadt
Ludwigsvorstadt-Isarvorstadt
Ludwigsvorstadt-Isarvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Maxvorstadt
Milbertshofen-Am Hart
Milbertshofen-Am Hart
Milbertshofen-Am Hart
Milbertshofen-Am Hart
Moosach
Moosach
Moosach
Moosach
Moosach
Neuhausen-Nymphenburg
Neuhausen-Nym

In [None]:
# lets get the shape of the new dataframe
munich_venues.shape

(3564, 7)

In [None]:
# Lets visualize the head of the new dataframe
munich_venues.head()

Unnamed: 0,District,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Allach-Untermenzing,48.195157,11.462973,Bäckerei Schuhmair,48.197175,11.459016,Bakery
1,Allach-Untermenzing,48.195157,11.462973,dm-drogerie markt,48.194118,11.46564,Drugstore
2,Allach-Untermenzing,48.195157,11.462973,Sport Bittl,48.191447,11.466553,Sporting Goods Shop
3,Allach-Untermenzing,48.195157,11.462973,Sicilia,48.193331,11.459387,Italian Restaurant
4,Allach-Untermenzing,48.195157,11.462973,Lidl,48.194428,11.465612,Supermarket


In [None]:
munich_venues.groupby('District').count()

Unnamed: 0_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allach-Untermenzing,40,40,40,40,40,40
Altstadt-Lehel,700,700,700,700,700,700
Au-Haidhausen,700,700,700,700,700,700
Berg am Laim,33,33,33,33,33,33
Bogenhausen,66,66,66,66,66,66
Feldmoching-Hasenbergl,6,6,6,6,6,6
Hadern,39,39,39,39,39,39
Laim,60,60,60,60,60,60
Ludwigsvorstadt-Isarvorstadt,168,168,168,168,168,168
Maxvorstadt,396,396,396,396,396,396


In [None]:
print('There are {} uniques categories.'.format(len(munich_venues['Venue Category'].unique())))

There are 167 uniques categories.


In [None]:
# lets get a one hot encoding of all differen
munich_onehot = pd.get_dummies(munich_venues[['Venue Category']], prefix="", prefix_sep="")

# add District column to dataframe
munich_onehot.insert(0, 'District', munich_data_ll['District'])
munich_onehot.head()

Unnamed: 0,District,ATM,American Restaurant,Arcade,Art Museum,Asian Restaurant,Athletics & Sports,Austrian Restaurant,Auto Dealership,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Bavarian Restaurant,Beach,Beer Garden,Big Box Store,Bistro,Bookstore,Boutique,Boxing Gym,Brewery,Burger Joint,Burrito Place,Bus Stop,Business Service,Café,Candy Store,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Deli / Bodega,Department Store,Design Studio,Diner,Discount Store,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Food,Food & Drink Shop,Food Court,Fountain,French Restaurant,Furniture / Home Store,Gas Station,Gastropub,German Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hawaiian Restaurant,Hill,Himalayan Restaurant,Hookah Bar,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Insurance Office,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Kebab Restaurant,Lake,Laser Tag,Laundry Service,Light Rail Station,Liquor Store,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Motel,Motorcycle Shop,Movie Theater,Museum,Nightclub,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Paper / Office Supplies Store,Park,Pastry Shop,Peruvian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Post Office,Pub,Ramen Restaurant,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,Soccer Field,Soup Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Tapas Restaurant,Taverna,Thai Restaurant,Theater,Theme Restaurant,Tiki Bar,Tram Station,Trattoria/Osteria,Tunnel,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Wine Bar,Wine Shop,Xinjiang Restaurant,Yoga Studio
0,Allach-Untermenzing,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Allach-Untermenzing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Allach-Untermenzing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Allach-Untermenzing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Allach-Untermenzing,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
munich_grouped = munich_onehot.groupby('District').mean().reset_index()
munich_grouped.head(10)

Unnamed: 0,District,ATM,American Restaurant,Arcade,Art Museum,Asian Restaurant,Athletics & Sports,Austrian Restaurant,Auto Dealership,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Bavarian Restaurant,Beach,Beer Garden,Big Box Store,Bistro,Bookstore,Boutique,Boxing Gym,Brewery,Burger Joint,Burrito Place,Bus Stop,Business Service,Café,Candy Store,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Deli / Bodega,Department Store,Design Studio,Diner,Discount Store,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Food,Food & Drink Shop,Food Court,Fountain,French Restaurant,Furniture / Home Store,Gas Station,Gastropub,German Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hawaiian Restaurant,Hill,Himalayan Restaurant,Hookah Bar,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Restaurant,Insurance Office,Intersection,Irish Pub,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Kebab Restaurant,Lake,Laser Tag,Laundry Service,Light Rail Station,Liquor Store,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Motel,Motorcycle Shop,Movie Theater,Museum,Nightclub,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Paper / Office Supplies Store,Park,Pastry Shop,Peruvian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Post Office,Pub,Ramen Restaurant,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Shopping Plaza,Smoke Shop,Snack Place,Soccer Field,Soup Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Tapas Restaurant,Taverna,Thai Restaurant,Theater,Theme Restaurant,Tiki Bar,Tram Station,Trattoria/Osteria,Tunnel,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Wine Bar,Wine Shop,Xinjiang Restaurant,Yoga Studio
0,Allach-Untermenzing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Altstadt-Lehel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Au-Haidhausen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aubing-Lochhausen-Langwied,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berg am Laim,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bogenhausen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Feldmoching-Hasenbergl,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hadern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Laim,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Ludwigsvorstadt-Isarvorstadt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
munich_grouped.shape

(25, 168)

In [None]:
num_top_venues = 5

for hood in munich_grouped['District']:
    print("----"+hood+"----")
    temp = munich_grouped[munich_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allach-Untermenzing----
                 venue  freq
0   Italian Restaurant   0.2
1            Drugstore   0.2
2          Supermarket   0.2
3               Bakery   0.2
4  Sporting Goods Shop   0.2


----Altstadt-Lehel----
                venue  freq
0           Drugstore  0.29
1  Italian Restaurant  0.14
2     Automotive Shop  0.14
3         Supermarket  0.14
4              Bakery  0.14


----Au-Haidhausen----
                 venue  freq
0            Drugstore  0.29
1          Supermarket  0.29
2               Bakery  0.14
3      Automotive Shop  0.14
4  Sporting Goods Shop  0.14


----Aubing-Lochhausen-Langwied----
                venue  freq
0  Italian Restaurant  0.33
1         Supermarket  0.33
2           Drugstore  0.33
3                 ATM  0.00
4   Outdoor Sculpture  0.00


----Berg am Laim----
                           venue  freq
0                         Bakery  0.25
1                    Supermarket  0.25
2                      Drugstore  0.25
3                Automo

In [None]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:] # exclude District column
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [None]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = munich_grouped['District']

for ind in np.arange(munich_grouped.shape[0]):
    district_venues_sorted.iloc[ind, 1:] = return_most_common_venues(munich_grouped.iloc[ind, :], num_top_venues)

district_venues_sorted.head()

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allach-Untermenzing,Sporting Goods Shop,Italian Restaurant,Supermarket,Drugstore,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop
1,Altstadt-Lehel,Drugstore,Sporting Goods Shop,Italian Restaurant,Supermarket,Automotive Shop,Bakery,Donut Shop,Dumpling Restaurant,Fountain,Food Court
2,Au-Haidhausen,Supermarket,Drugstore,Sporting Goods Shop,Automotive Shop,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop
3,Aubing-Lochhausen-Langwied,Italian Restaurant,Supermarket,Drugstore,Yoga Studio,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop,Fish Market
4,Berg am Laim,Drugstore,Supermarket,Automotive Shop,Bakery,Yoga Studio,Farmers Market,Fountain,Food Court,Food & Drink Shop,Food


In [None]:
num_clusters = 5

X = munich_grouped.drop('District', 1)

clustering = SpectralClustering(n_clusters=num_clusters,
        assign_labels='discretize',
        random_state=0).fit(X)
clustering.labels_

clustering



SpectralClustering(affinity='rbf', assign_labels='discretize', coef0=1,
                   degree=3, eigen_solver=None, eigen_tol=0.0, gamma=1.0,
                   kernel_params=None, n_clusters=5, n_components=None,
                   n_init=10, n_jobs=None, n_neighbors=10, random_state=0)

In [None]:
# add clustering labels
district_venues_sorted.insert(0, 'Cluster Labels', clustering.labels_)

munich_merged = munich_data_ll

# merge labels and data about venues to district data and latitude plus longitude data to have all in one dataframe
munich_merged = munich_merged.join(district_venues_sorted.set_index('District'), on='District')

munich_merged.head()


Unnamed: 0,District,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allach-Untermenzing,80995,48.195157,11.462973,0,Sporting Goods Shop,Italian Restaurant,Supermarket,Drugstore,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop
1,Allach-Untermenzing,80997,48.195157,11.462973,0,Sporting Goods Shop,Italian Restaurant,Supermarket,Drugstore,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop
2,Allach-Untermenzing,80999,48.195157,11.462973,0,Sporting Goods Shop,Italian Restaurant,Supermarket,Drugstore,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop
3,Allach-Untermenzing,81247,48.195157,11.462973,0,Sporting Goods Shop,Italian Restaurant,Supermarket,Drugstore,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop
4,Allach-Untermenzing,81249,48.195157,11.462973,0,Sporting Goods Shop,Italian Restaurant,Supermarket,Drugstore,Bakery,Falafel Restaurant,Food Court,Food & Drink Shop,Food,Flower Shop


In [None]:
# create map
map_clusters = folium.Map(location=[latitude_munich, longitude_munich], zoom_start=11)

# set color scheme for the clusters
indian_red = '#CD5C5C'
blue = '#2980B9'
purple = '#5B2C6F'
gold = '#F1C40F'
green = '#239B56'
x = np.arange(num_clusters)
rainbow = [indian_red, blue, purple, gold, green]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(munich_merged['Latitude'], munich_merged['Longitude'], munich_merged['District'], munich_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
# first: lets examine the green cluster (number zero)
cluster0 = munich_merged.loc[munich_merged['Cluster Labels'] == 0, munich_merged.columns[[1] + list(range(5, munich_merged.shape[1]))]]
cluster0['1st Most Common Venue'].value_counts()

Drugstore              11
Supermarket             7
Sporting Goods Shop     5
Bakery                  3
Name: 1st Most Common Venue, dtype: int64

In [None]:
# next: lets examine the indian red cluster (number one)
cluster1 = munich_merged.loc[munich_merged['Cluster Labels'] == 1, munich_merged.columns[[1] + list(range(5, munich_merged.shape[1]))]]
cluster1['1st Most Common Venue'].value_counts()

Plaza         16
Café           7
Restaurant     5
Name: 1st Most Common Venue, dtype: int64

In [None]:
# next: lets examine the blue cluster (number two)
cluster2 = munich_merged.loc[munich_merged['Cluster Labels'] == 2, munich_merged.columns[[1] + list(range(5, munich_merged.shape[1]))]]
cluster2['1st Most Common Venue'].value_counts()

Department Store      9
Ice Cream Shop        8
Falafel Restaurant    7
Pizza Place           6
Church                4
Bookstore             4
Clothing Store        4
Strip Club            3
Name: 1st Most Common Venue, dtype: int64

In [None]:
# Now Lets examine the purple cluster (number three)
cluster3 = munich_merged.loc[munich_merged['Cluster Labels'] == 3, munich_merged.columns[[1] + list(range(5, munich_merged.shape[1]))]]
cluster3['1st Most Common Venue'].value_counts()

Department Store       5
Sporting Goods Shop    5
Tiki Bar               4
Café                   2
Name: 1st Most Common Venue, dtype: int64

In [None]:
# Now lets examine the yellow cluster (number four)
cluster4 = munich_merged.loc[munich_merged['Cluster Labels'] == 4, munich_merged.columns[[1] + list(range(5, munich_merged.shape[1]))]]
cluster4['1st Most Common Venue'].value_counts()

Supermarket           6
Italian Restaurant    6
Name: 1st Most Common Venue, dtype: int64