In [100]:
#!pip install html5lib
import requests
import sys
import html5lib
import pandas as pd
import pgeocode

# 1. Scraping the data and preprocessing 

In [101]:
# get html response from wikipedia
res = requests.get('https://www.postalpinzipcodes.com/Post-Office-Deu-Germany-Darmstadt-Postal-Code-64297-Zip-Code')

In [102]:
#read html as dataframe, first item is the required table 
df = pd.read_html(res.text)
df = df[1]

In [103]:
df = df.drop(index=[0,1]).reset_index(drop=True)
df.drop(columns=2, inplace=True)
df.columns=['Name', 'Postal_Code']
df = df.merge(df['Name'].str.split(expand=True)[2], right_index=True ,left_index=True)
df = df.merge(df['Postal_Code'].str.split(expand=True)[2], right_index=True ,left_index=True)
df = df[['2_x', '2_y']]
df.columns=['Neighbourhood', 'Postal_Code']
df

Unnamed: 0,Neighbourhood,Postal_Code
0,Darmstadt,64297
1,Pfungstadt,64319
2,Darmstadt,64285
3,Mühltal,64367
4,Darmstadt,64295
5,Darmstadt,64287
6,Darmstadt,64283
7,Seeheim-Jugenheim,64342
8,Darmstadt,64293
9,Ober-Ramstadt,64372


In [104]:
#Rename Darmstadt with different zip codes as Darmstadt-1, Darmstadt-2 and so on
disc = 1
for index, row in df.iterrows():
    if (row['Neighbourhood'] == 'Darmstadt'):
        df.loc[index,'Neighbourhood'] = 'Darmstadt-' + str(disc)
        disc +=1

In [105]:
df

Unnamed: 0,Neighbourhood,Postal_Code
0,Darmstadt-1,64297
1,Pfungstadt,64319
2,Darmstadt-2,64285
3,Mühltal,64367
4,Darmstadt-3,64295
5,Darmstadt-4,64287
6,Darmstadt-5,64283
7,Seeheim-Jugenheim,64342
8,Darmstadt-6,64293
9,Ober-Ramstadt,64372


In [106]:
nm = pgeocode.Nominatim(country='de')

In [107]:
nm.query_postal_code(64289)

postal_code                               64289
country code                                 DE
place_name                            Darmstadt
state_name                               Hessen
state_code                                   HE
county_name          Regierungsbezirk Darmstadt
county_code                                  64
community_name    Darmstadt, Wissenschaftsstadt
community_code                             6411
latitude                                49.8972
longitude                                8.6809
accuracy                                      4
Name: 0, dtype: object

In [108]:
lat=[]
lon=[]
for code in df['Postal_Code']:
    res = nm.query_postal_code(code)
    lat.append(res.latitude)
    lon.append(res.longitude)
    

In [109]:
#add coordinates
df['Latitude'] = lat
df['Longitude'] = lon
df

Unnamed: 0,Neighbourhood,Postal_Code,Latitude,Longitude
0,Darmstadt-1,64297,49.8192,8.6449
1,Pfungstadt,64319,49.8056,8.6031
2,Darmstadt-2,64285,49.8591,8.6486
3,Mühltal,64367,49.8156,8.7081
4,Darmstadt-3,64295,49.8611,8.6373
5,Darmstadt-4,64287,49.8676,8.6644
6,Darmstadt-5,64283,49.8719,8.6484
7,Seeheim-Jugenheim,64342,49.765,8.6519
8,Darmstadt-6,64293,49.8875,8.6446
9,Ober-Ramstadt,64372,49.8308,8.7489


In [110]:
import folium

In [111]:
# create map of Toronto using latitude and longitude values

map_darmstadt = folium.Map(location=[49.8875, 8.6446], zoom_start=10)

# add markers to map
for lat, lng, name in zip(df['Latitude'], df['Longitude'], df['Neighbourhood']):
    label = name
    label = folium.Popup(label, parse_html=True)
    color = 'blue'
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color= color,
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_darmstadt)  
    
map_darmstadt

In [112]:
CLIENT_ID = '1XZFLC0ARIXHRCUD5TPXG2WWAYUWBV4NJOIBF1F31VIGADKA' #Foursquare ID
CLIENT_SECRET = '3JUPYU3IO1EFTGOXBUF3BUEXZQNFMJNJPDOFGBRLUOLDZP4N' #Foursquare Secret
VERSION = '20200707' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1XZFLC0ARIXHRCUD5TPXG2WWAYUWBV4NJOIBF1F31VIGADKA
CLIENT_SECRET:3JUPYU3IO1EFTGOXBUF3BUEXZQNFMJNJPDOFGBRLUOLDZP4N


In [113]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Darmstadt-1 are 49.8192, 8.6449.


In [114]:
radius = 500
LIMIT = 100

In [115]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [116]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f253da78b9c0f54261f0407'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Current map view',
  'headerFullLocation': 'Current map view',
  'headerLocationGranularity': 'unknown',
  'totalResults': 6,
  'suggestedBounds': {'ne': {'lat': 49.823700004500004,
    'lng': 8.651861547791714},
   'sw': {'lat': 49.8146999955, 'lng': 8.637938452208285}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b4c91acf964a52063b626e3',
       'name': 'Radieschen',
       'location': {'address': 'Reuterallee 37',
        'lat': 49.820171239838814,
        'lng': 8.639814456523967,
        'labeledLatLngs': [{'label': 'display',
          'lat': 49.82017123983881

In [168]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        #return categories_list[0]['shortName']
        return categories_list[0]['name']

In [169]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
#from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
#import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [170]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Radieschen,Vegetarian / Vegan Restaurant,49.820171,8.639814
1,Bäckerei Hofmann,Bakery,49.819703,8.644215
2,Bella Sardegna,Italian Restaurant,49.820855,8.644312
3,H Wartehalle,Bus Stop,49.820838,8.644256
4,REWE,Supermarket,49.816089,8.644629


In [120]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [121]:
tor_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )


Darmstadt-1
Pfungstadt
Darmstadt-2
Mühltal
Darmstadt-3
Darmstadt-4
Darmstadt-5
Seeheim-Jugenheim
Darmstadt-6
Ober-Ramstadt
Griesheim
Modautal
Bickenbach
Roßdorf
Darmstadt-7
Alsbach-Hähnlein
Weiterstadt
Darmstadt-8
Zwingenberg
Lautertal


In [173]:
tor_venues  = tor_venues[tor_venues['Venue Category'].str.count('(?:^|\W)Restaurant(?:$|\W)')>0]
print(tor_venues.shape)

(42, 7)


I only care about restaurants so, filter out rest..

In [174]:
#check number of venues in each neighboirhood
tor_venues.groupby('Neighbourhood').count()['Venue'].sort_values(ascending=False)

Neighbourhood
Darmstadt-5          20
Darmstadt-4           4
Darmstadt-2           4
Weiterstadt           3
Seeheim-Jugenheim     2
Darmstadt-8           2
Darmstadt-1           2
Pfungstadt            1
Ober-Ramstadt         1
Griesheim             1
Darmstadt-6           1
Darmstadt-3           1
Name: Venue, dtype: int64

In [175]:
print('There are {} uniques categories.'.format(len(tor_venues['Venue Category'].unique())))

There are 22 uniques categories.


In [242]:
tor_venues.groupby('Venue Category').count()['Venue'].sort_values(ascending = False)

Venue Category
German Restaurant                6
Italian Restaurant               4
Sushi Restaurant                 4
Asian Restaurant                 3
Turkish Restaurant               3
Falafel Restaurant               2
French Restaurant                2
Greek Restaurant                 2
Vegetarian / Vegan Restaurant    2
Middle Eastern Restaurant        2
Indian Restaurant                1
Mediterranean Restaurant         1
Mexican Restaurant               1
Fast Food Restaurant             1
Restaurant                       1
Eastern European Restaurant      1
Doner Restaurant                 1
Cuban Restaurant                 1
Comfort Food Restaurant          1
Australian Restaurant            1
Tapas Restaurant                 1
American Restaurant              1
Name: Venue, dtype: int64

In [243]:
# one hot encoding
tor_onehot = pd.get_dummies(tor_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighbourhood'] = tor_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

tor_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant,Cuban Restaurant,Doner Restaurant,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,French Restaurant,German Restaurant,Greek Restaurant,Indian Restaurant,Italian Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Restaurant,Sushi Restaurant,Tapas Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant
0,Darmstadt-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,Darmstadt-1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
10,Pfungstadt,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
17,Darmstadt-2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
18,Darmstadt-2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [244]:
tor_onehot.shape

(42, 23)

In [245]:
tor_grouped = tor_onehot.groupby('Neighbourhood').mean().reset_index()
tor_grouped

Unnamed: 0,Neighbourhood,American Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant,Cuban Restaurant,Doner Restaurant,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,French Restaurant,German Restaurant,Greek Restaurant,Indian Restaurant,Italian Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Restaurant,Sushi Restaurant,Tapas Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant
0,Darmstadt-1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
1,Darmstadt-2,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Darmstadt-3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Darmstadt-4,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0
4,Darmstadt-5,0.05,0.1,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.05,0.05,0.0,0.1,0.0,0.05,0.1,0.0,0.15,0.0,0.15,0.05
5,Darmstadt-6,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Darmstadt-8,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Griesheim,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ober-Ramstadt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Pfungstadt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [246]:
#Find out most comon venues for each neighborhood
num_top_venues = 5

for hood in tor_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = tor_grouped[tor_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Darmstadt-1----
                           venue  freq
0  Vegetarian / Vegan Restaurant   0.5
1             Italian Restaurant   0.5
2               Asian Restaurant   0.0
3             Turkish Restaurant   0.0
4               Tapas Restaurant   0.0


----Darmstadt-2----
                 venue  freq
0    German Restaurant  0.50
1     Cuban Restaurant  0.25
2    French Restaurant  0.25
3  American Restaurant  0.00
4   Italian Restaurant  0.00


----Darmstadt-3----
                 venue  freq
0     Asian Restaurant   1.0
1  American Restaurant   0.0
2   Turkish Restaurant   0.0
3     Tapas Restaurant   0.0
4     Sushi Restaurant   0.0


----Darmstadt-4----
                     venue  freq
0        Indian Restaurant  0.25
1  Comfort Food Restaurant  0.25
2         Tapas Restaurant  0.25
3         Sushi Restaurant  0.25
4      American Restaurant  0.00


----Darmstadt-5----
                       venue  freq
0         Turkish Restaurant  0.15
1           Sushi Restaurant  0.15
2  Midd

In [247]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [249]:
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = tor_grouped['Neighbourhood']

for ind in np.arange(tor_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Darmstadt-1,Vegetarian / Vegan Restaurant,Italian Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
1,Darmstadt-2,German Restaurant,Cuban Restaurant,French Restaurant,Turkish Restaurant,Asian Restaurant
2,Darmstadt-3,Asian Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Australian Restaurant,Comfort Food Restaurant
3,Darmstadt-4,Tapas Restaurant,Sushi Restaurant,Comfort Food Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
4,Darmstadt-5,Sushi Restaurant,Turkish Restaurant,Asian Restaurant,Middle Eastern Restaurant,Italian Restaurant
5,Darmstadt-6,Eastern European Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
6,Darmstadt-8,Doner Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant,French Restaurant,Asian Restaurant
7,Griesheim,Falafel Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
8,Ober-Ramstadt,German Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant
9,Pfungstadt,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant


In [250]:
# set number of clusters
kclusters = 5

tor_grouped_clustering = tor_grouped.drop('Neighbourhood', 1)

#run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([0, 1, 0, 0, 0, 4, 0, 2, 1, 3, 1, 0])

In [251]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = df
tor_merged = tor_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

##assign an additional cluster for that dont have restarants at all (5 as cluster label)
tor_merged['Cluster Labels'].fillna(5, inplace= True) 
tor_merged['Cluster Labels'] = tor_merged['Cluster Labels'].astype(int,errors ='ignore' )
tor_merged.reset_index(drop=True, inplace=True)
tor_merged # check the last columns!

Unnamed: 0,Neighbourhood,Postal_Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Darmstadt-1,64297,49.8192,8.6449,0,Vegetarian / Vegan Restaurant,Italian Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
1,Pfungstadt,64319,49.8056,8.6031,3,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
2,Darmstadt-2,64285,49.8591,8.6486,1,German Restaurant,Cuban Restaurant,French Restaurant,Turkish Restaurant,Asian Restaurant
3,Mühltal,64367,49.8156,8.7081,5,,,,,
4,Darmstadt-3,64295,49.8611,8.6373,0,Asian Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Australian Restaurant,Comfort Food Restaurant
5,Darmstadt-4,64287,49.8676,8.6644,0,Tapas Restaurant,Sushi Restaurant,Comfort Food Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
6,Darmstadt-5,64283,49.8719,8.6484,0,Sushi Restaurant,Turkish Restaurant,Asian Restaurant,Middle Eastern Restaurant,Italian Restaurant
7,Seeheim-Jugenheim,64342,49.765,8.6519,1,German Restaurant,Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
8,Darmstadt-6,64293,49.8875,8.6446,4,Eastern European Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
9,Ober-Ramstadt,64372,49.8308,8.7489,1,German Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant


In [221]:
tor_merged

Unnamed: 0,Neighbourhood,Postal_Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Darmstadt-1,64297,49.8192,8.6449,0,Vegetarian / Vegan Restaurant,Italian Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
1,Pfungstadt,64319,49.8056,8.6031,3,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
2,Darmstadt-2,64285,49.8591,8.6486,1,German Restaurant,Cuban Restaurant,French Restaurant,Turkish Restaurant,Asian Restaurant
3,Mühltal,64367,49.8156,8.7081,5,,,,,
4,Darmstadt-3,64295,49.8611,8.6373,0,Asian Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Australian Restaurant,Comfort Food Restaurant
5,Darmstadt-4,64287,49.8676,8.6644,0,Tapas Restaurant,Sushi Restaurant,Comfort Food Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
6,Darmstadt-5,64283,49.8719,8.6484,0,Sushi Restaurant,Turkish Restaurant,Asian Restaurant,Middle Eastern Restaurant,Italian Restaurant
7,Seeheim-Jugenheim,64342,49.765,8.6519,1,German Restaurant,Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
8,Darmstadt-6,64293,49.8875,8.6446,4,Eastern European Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
9,Ober-Ramstadt,64372,49.8308,8.7489,1,German Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant


In [229]:
# create map
map_clusters = folium.Map(location=[49.8875, 8.6446], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters+1)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighbourhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [252]:
tor_merged

Unnamed: 0,Neighbourhood,Postal_Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Darmstadt-1,64297,49.8192,8.6449,0,Vegetarian / Vegan Restaurant,Italian Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
1,Pfungstadt,64319,49.8056,8.6031,3,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
2,Darmstadt-2,64285,49.8591,8.6486,1,German Restaurant,Cuban Restaurant,French Restaurant,Turkish Restaurant,Asian Restaurant
3,Mühltal,64367,49.8156,8.7081,5,,,,,
4,Darmstadt-3,64295,49.8611,8.6373,0,Asian Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Australian Restaurant,Comfort Food Restaurant
5,Darmstadt-4,64287,49.8676,8.6644,0,Tapas Restaurant,Sushi Restaurant,Comfort Food Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
6,Darmstadt-5,64283,49.8719,8.6484,0,Sushi Restaurant,Turkish Restaurant,Asian Restaurant,Middle Eastern Restaurant,Italian Restaurant
7,Seeheim-Jugenheim,64342,49.765,8.6519,1,German Restaurant,Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
8,Darmstadt-6,64293,49.8875,8.6446,4,Eastern European Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant
9,Ober-Ramstadt,64372,49.8308,8.7489,1,German Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant


In [269]:
tor_merged[tor_merged.columns[[0,4,5,6,7,8,9]]].sort_values('Cluster Labels')

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Darmstadt-1,0,Vegetarian / Vegan Restaurant,Italian Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
17,Darmstadt-8,0,Doner Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant,French Restaurant,Asian Restaurant
16,Weiterstadt,0,German Restaurant,Mediterranean Restaurant,Greek Restaurant,French Restaurant,Asian Restaurant
4,Darmstadt-3,0,Asian Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Australian Restaurant,Comfort Food Restaurant
5,Darmstadt-4,0,Tapas Restaurant,Sushi Restaurant,Comfort Food Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
6,Darmstadt-5,0,Sushi Restaurant,Turkish Restaurant,Asian Restaurant,Middle Eastern Restaurant,Italian Restaurant
7,Seeheim-Jugenheim,1,German Restaurant,Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
9,Ober-Ramstadt,1,German Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant
2,Darmstadt-2,1,German Restaurant,Cuban Restaurant,French Restaurant,Turkish Restaurant,Asian Restaurant
10,Griesheim,2,Falafel Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant


In [253]:
tor_merged.loc[tor_merged['Cluster Labels'] == 0, tor_merged.columns[[0] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Darmstadt-1,Vegetarian / Vegan Restaurant,Italian Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
4,Darmstadt-3,Asian Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Australian Restaurant,Comfort Food Restaurant
5,Darmstadt-4,Tapas Restaurant,Sushi Restaurant,Comfort Food Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
6,Darmstadt-5,Sushi Restaurant,Turkish Restaurant,Asian Restaurant,Middle Eastern Restaurant,Italian Restaurant
16,Weiterstadt,German Restaurant,Mediterranean Restaurant,Greek Restaurant,French Restaurant,Asian Restaurant
17,Darmstadt-8,Doner Restaurant,Italian Restaurant,Vegetarian / Vegan Restaurant,French Restaurant,Asian Restaurant


In [254]:
tor_merged.loc[tor_merged['Cluster Labels'] == 1, tor_merged.columns[[0] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Darmstadt-2,German Restaurant,Cuban Restaurant,French Restaurant,Turkish Restaurant,Asian Restaurant
7,Seeheim-Jugenheim,German Restaurant,Restaurant,French Restaurant,Asian Restaurant,Australian Restaurant
9,Ober-Ramstadt,German Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant,Comfort Food Restaurant


In [255]:
tor_merged.loc[tor_merged['Cluster Labels'] == 2, tor_merged.columns[[0] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
10,Griesheim,Falafel Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant


In [256]:
tor_merged.loc[tor_merged['Cluster Labels'] == 3, tor_merged.columns[[0] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Pfungstadt,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant


In [257]:
tor_merged.loc[tor_merged['Cluster Labels'] == 4, tor_merged.columns[[0] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
8,Darmstadt-6,Eastern European Restaurant,Vegetarian / Vegan Restaurant,Turkish Restaurant,Asian Restaurant,Australian Restaurant


In [258]:
tor_merged.loc[tor_merged['Cluster Labels'] == 5, tor_merged.columns[[0] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
3,Mühltal,,,,,
11,Modautal,,,,,
12,Bickenbach,,,,,
13,Roßdorf,,,,,
14,Darmstadt-7,,,,,
15,Alsbach-Hähnlein,,,,,
18,Zwingenberg,,,,,
19,Lautertal,,,,,


# The END