### Import libraries

In [1]:
#!conda install -c conda-forge geopy --yes 
#!conda install -c conda-forge folium=0.5.0 --yes

In [2]:
#!conda install -c conda-forge geocoder --yes

In [3]:
import geocoder
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import json
import urllib

### Prepare location data

Prepare empty data frame with the following column: 'PostalCode', 'Streets'. Check if the empty dataframe is correct

In [4]:
columns=("Postal Code", "Streets")
df = pd.DataFrame(columns=columns)
df

Unnamed: 0,Postal Code,Streets


Retrieve list of postal codes with street names. For this purporse the following website will be used: <a href="https://trojmiasto.onet.pl/kod-pocztowy-gdansk-lista-kodow-pocztowych-artykul/7ftd6ws">https://trojmiasto.onet.pl/kod-pocztowy-gdansk-lista-kodow-pocztowych-artykul/7ftd6ws</a>

Data are present within the following tag:

&lt;p class="hyphenate " data-text-len="27" data-scroll="paragraph_1520"&gt;80-176;Gdańsk;Sympatyczna;&nbsp;&lt;/p&gt;

Inside the tag they are presented as:

&lt;Postal Code&gt;&lt;City name&gt;&lt;Street name&gt;

Note, that for one postal code there may be several street names. In this case these  rows will be combined into one row with the streets separated with a semicolon

In [5]:
#postal_codes='https://worldpostalcode.com/poland/pomerania/gdansk'
postal_codes_link = 'https://trojmiasto.onet.pl/kod-pocztowy-gdansk-lista-kodow-pocztowych-artykul/7ftd6ws'
r = requests.get(postal_codes_link)
soup = BeautifulSoup(r.text, 'html.parser')

In [6]:
postalCodesWithTag= soup.findAll("p", {"class": "hyphenate"})
index = 0
for postalCodeWithTag in postalCodesWithTag:
    postalCodeWithStreet = postalCodeWithTag.find(text=True).strip()
    if postalCodeWithStreet.startswith("80-"): # ignore rows not related to postal codes
        postalCode=postalCodeWithStreet[:6]
        streets=postalCodeWithStreet[14:]
        
        row = df.loc[df['Postal Code'] == postalCode]
        if row.empty:             
            df.loc[index] = [postalCode, streets]
            index = index + 1
        else:
            df.loc[df['Postal Code'] == postalCode, 'Streets'] = df.loc[df['Postal Code'] == postalCode, 'Streets'] + streets
print("Postal codes loaded!")

Postal codes loaded!


Check the dataframe

In [7]:
df.head()

Unnamed: 0,Postal Code,Streets
0,80-180,11 Listopada;Aleksandra Dulin'a;Alfonsa Flisyk...
1,80-041,3 Brygady Szczerbca;Alojzego Bruskiego;Antonie...
2,80-802,3 Maja;gen. Henryka Dąbrowskiego;1-4Gradowa;
3,80-299,Achillesa;Afrodyty;Akteona;Andromedy;Antygony;...
4,80-316,Adama Asnyka;Kaprów;


In [8]:
#!wget --quiet https://geocode.xyz/Gdansk&auth=162241483315015829435x1514&?json=1 -O gdansk_loc.json
    
print('GeoJSON file downloaded!')

GeoJSON file downloaded!


In [9]:
json_data=open('gdansk_loc.json').read()
geo_data_values= json.loads(json_data)
print('JSON data loaded!')

JSON data loaded!


In [10]:
print(requests.get("https://geocode.xyz/80-802&auth=162241483315015829435x1514&?region=PL&json=1").json())

{'success': False, 'error': {'code': '006', 'message': 'Request Throttled.'}}


Load the geographical coordinates of each postal code

In [11]:
gdansk_loc = [float(geo_data_values["alt"]["loc"][0]["latt"]), float(geo_data_values["alt"]["loc"][0]["longt"])]
# https://geocode.xyz/80-802?region=PL&json=1
def getLocation(postalCode):
    for geo in geo_data_values["alt"]["loc"]:
        if geo["postal"] == postalCode:            
            return {'Postal Code' : postalCode, 'Longitude': geo["longt"], 'Latitude':geo["latt"]}
    return {'Postal Code' : postalCode, 'Longitude':np.nan, 'Latitude':np.nan}

def createLocationDF(postalCodes):
    locations = []    
    for postalCode in postalCodes:
        #print(postalCode)
        locations.append(getLocation(postalCode))
        
    return pd.DataFrame(locations)

In [12]:
locations = createLocationDF(postalCodes=df['Postal Code'])
locations['Latitude'] = locations['Latitude'].astype('float')
locations['Longitude'] = locations['Longitude'].astype('float')
locations.head()

Unnamed: 0,Latitude,Longitude,Postal Code
0,54.31644,18.58729,80-180
1,54.32466,18.61394,80-041
2,,,80-802
3,,,80-299
4,,,80-316


In [13]:
df_merged = pd.merge(df, locations)
df_merged.dropna(inplace=True)
df_merged.head()

Unnamed: 0,Postal Code,Streets,Latitude,Longitude
0,80-180,11 Listopada;Aleksandra Dulin'a;Alfonsa Flisyk...,54.31644,18.58729
1,80-041,3 Brygady Szczerbca;Alojzego Bruskiego;Antonie...,54.32466,18.61394
39,80-210,al. Zwycięstwa;41-46Józefa Hoene-Wrońskiego;Ju...,54.36629,18.63048
40,80-207,al. Zwycięstwa;49-59,54.36409,18.63694
50,80-172,Aleksandra Volty;Alfreda Nobla;Blaise'a Pascal...,54.35121,18.59498


In [14]:
df_merged.shape

(99, 4)

Display map

In [15]:
gdansk_map = folium.Map(location=gdansk_loc, zoom_start=11)

postal_code_markers = folium.map.FeatureGroup()

for lat, lng, in zip(df_merged.Latitude, df_merged.Longitude):
    postal_code_markers.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )
gdansk_map.add_child(postal_code_markers)
# display map
gdansk_map

### Retrieve the most popular venues for each neighborhood

Prepare Foursquare calls

In [16]:
CLIENT_ID = 'BYYI2T0OESVKZWMHAALNG1HRUKK1GP4IA1XSDTXHX0TBUWBJ' # your Foursquare ID
CLIENT_SECRET = 'WXAHOZU4KN2MY4SK34SY4RO31AWEN32LIO41CFYBK5WRADNS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: BYYI2T0OESVKZWMHAALNG1HRUKK1GP4IA1XSDTXHX0TBUWBJ
CLIENT_SECRET:WXAHOZU4KN2MY4SK34SY4RO31AWEN32LIO41CFYBK5WRADNS


Define "getNearbyVenues" function from previous assignment

In [40]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):        
        print(name, end=" ")
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        # make the GET request
   #     print(url)
   #     print(requests.get(url).json()["response"])
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Retrieve Gdansk venues

In [41]:
gdansk_venues = getNearbyVenues(names=df_merged['Postal Code'],                                
                                   latitudes=df_merged['Latitude'],
                                   longitudes=df_merged['Longitude']
                                  )


80-180 80-041 80-210 80-207 80-172 80-175 80-116 80-034 80-012 80-119 80-177 80-008 80-178 80-058 80-135 80-045 80-110 80-126 80-046 80-048 80-171 80-071 80-208 80-204 80-124 80-170 80-174 80-203 80-153 80-151 80-108 80-053 80-032 80-010 80-107 80-114 80-156 80-150 80-057 80-169 80-122 80-125 80-141 80-103 80-011 80-113 80-154 80-073 80-068 80-070 80-146 80-133 80-031 80-176 80-137 80-003 80-121 80-037 80-078 80-123 80-063 80-029 80-020 80-056 80-038 80-064 80-152 80-165 80-055 80-035 80-027 80-067 80-007 80-014 80-051 80-074 80-052 80-047 80-134 80-145 80-065 80-075 80-061 80-022 80-044 80-018 80-039 80-001 80-043 80-017 80-077 80-013 80-139 80-155 80-066 80-140 80-036 80-016 80-059 

Check data

In [42]:
gdansk_venues.head()

Unnamed: 0,Postal Code,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,80-180,54.31644,18.58729,Biedronka,54.318463,18.582344,Grocery Store
1,80-180,54.31644,18.58729,Zakoniczyn,54.318534,18.582584,Playground
2,80-180,54.31644,18.58729,"Przystanek ""Niepołomicka""",54.3205,18.593076,Bus Station
3,80-180,54.31644,18.58729,Da Grasso,54.320332,18.594629,Pizza Place
4,80-041,54.32466,18.61394,Lidl,54.326002,18.613221,Grocery Store


In [43]:
gdansk_venues.shape

(1134, 7)

Checking how many venues were returned for each neighborhood

In [44]:
gdansk_venues.groupby('Postal Code').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
80-001,3,3,3,3,3,3
80-003,1,1,1,1,1,1
80-007,1,1,1,1,1,1
80-008,2,2,2,2,2,2
80-010,1,1,1,1,1,1
80-011,1,1,1,1,1,1
80-012,1,1,1,1,1,1
80-013,2,2,2,2,2,2
80-014,1,1,1,1,1,1
80-016,3,3,3,3,3,3


Check the most common venues

In [45]:
# one hot encoding
gdansk_onehot = pd.get_dummies(gdansk_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
gdansk_onehot['Postal Code'] = gdansk_venues['Postal Code'] 

# move neighborhood column to the first column
fixed_columns = [gdansk_onehot.columns[-1]] + list(gdansk_onehot.columns[:-1])
gdansk_onehot = gdansk_onehot[fixed_columns]

gdansk_onehot.head()

Unnamed: 0,Postal Code,Asian Restaurant,BBQ Joint,Bakery,Bar,Bed & Breakfast,Beer Garden,Big Box Store,Boat or Ferry,Bookstore,...,Skating Rink,Soccer Stadium,Sporting Goods Shop,Supermarket,Sushi Restaurant,Theater,Toy / Game Store,Train Station,Tram Station,Vegetarian / Vegan Restaurant
0,80-180,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,80-180,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,80-180,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,80-180,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,80-041,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
gdansk_grouped = gdansk_onehot.groupby('Postal Code').mean().reset_index()
gdansk_grouped.head()

Unnamed: 0,Postal Code,Asian Restaurant,BBQ Joint,Bakery,Bar,Bed & Breakfast,Beer Garden,Big Box Store,Boat or Ferry,Bookstore,...,Skating Rink,Soccer Stadium,Sporting Goods Shop,Supermarket,Sushi Restaurant,Theater,Toy / Game Store,Train Station,Tram Station,Vegetarian / Vegan Restaurant
0,80-001,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
1,80-003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,80-007,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,80-008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,80-010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [47]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [48]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal Code'] = gdansk_grouped['Postal Code']

for ind in np.arange(gdansk_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(gdansk_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,80-001,Forest,Train Station,Bed & Breakfast,Vegetarian / Vegan Restaurant,Dessert Shop,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space
1,80-003,Sushi Restaurant,Vegetarian / Vegan Restaurant,Forest,Dessert Shop,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Fast Food Restaurant
2,80-007,Bus Station,Vegetarian / Vegan Restaurant,Forest,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Fast Food Restaurant,Food
3,80-008,Bus Station,Department Store,Vegetarian / Vegan Restaurant,Forest,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Fast Food Restaurant
4,80-010,Bus Station,Vegetarian / Vegan Restaurant,Forest,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Fast Food Restaurant,Food


Run *k*-means to cluster the neighborhood into 10 clusters.

In [54]:
kclusters = 10
gdansk_grouped_clustering = gdansk_grouped.drop('Postal Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(gdansk_grouped_clustering)
kmeans.labels_.shape
gdansk_grouped_clustering.shape

(99, 93)

In [82]:
gdansk_merged = df_merged

gdansk_merged['Cluster Labels'] = kmeans.labels_

gdansk_merged = gdansk_merged.join(neighborhoods_venues_sorted.set_index('Postal Code'), on='Postal Code')

gdansk_merged.head() 

Unnamed: 0,Postal Code,Streets,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,80-180,11 Listopada;Aleksandra Dulin'a;Alfonsa Flisyk...,54.31644,18.58729,7,Bus Station,Pizza Place,Grocery Store,Playground,Vegetarian / Vegan Restaurant,Dessert Shop,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
1,80-041,3 Brygady Szczerbca;Alojzego Bruskiego;Antonie...,54.32466,18.61394,6,Grocery Store,Food & Drink Shop,Diner,Park,Fast Food Restaurant,Bus Station,Vegetarian / Vegan Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
39,80-210,al. Zwycięstwa;41-46Józefa Hoene-Wrońskiego;Ju...,54.36629,18.63048,1,Park,Light Rail Station,Opera House,Skating Rink,Multiplex,Concert Hall,Breakfast Spot,Restaurant,Beer Garden,Italian Restaurant
40,80-207,al. Zwycięstwa;49-59,54.36409,18.63694,9,Scenic Lookout,History Museum,Light Rail Station,Hotel,Music Venue,Historic Site,Event Space,Beer Garden,Park,Concert Hall
50,80-172,Aleksandra Volty;Alfreda Nobla;Blaise'a Pascal...,54.35121,18.59498,1,Bus Station,Light Rail Station,Supermarket,Gym,Grocery Store,Coffee Shop,Gas Station,Fast Food Restaurant,Multiplex,Vegetarian / Vegan Restaurant


### Generating map to visualize  neighborhoods and how they cluster together.

In [83]:
# create map
map_clusters = folium.Map(location=gdansk_loc, zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(gdansk_merged['Latitude'], gdansk_merged['Longitude'], gdansk_merged['Postal Code'], gdansk_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Examine clusters

In [84]:
for i in range(0, kclusters-1):
    print("Cluster {}".format(i))
    print(gdansk_merged.loc[gdansk_merged['Cluster Labels'] == i, gdansk_merged.columns[[1] + list(range(5, gdansk_merged.shape[1]))]])

Cluster 0
                                               Streets 1st Most Common Venue  \
73   Azaliowa;Borówkowa;Chabrowa;Czeremchowa;Damrok...         Grocery Store   
85   Bieszkowicka;Charzykowska;Czerwcowa;Franciszka...           Bus Station   
258  Jana Kozietulskiego;Kolonia Praca;Księcia Józe...                 Hotel   
277  Józefa Pankiewicza;Legnicka;Maksymiliana Giery...                 Hotel   
281                       Junacka;Radunicka;Związkowa;                 Hotel   
287  Karola Kurpińskiego;Otwarta;Piotra Czajkowskie...         Grocery Store   
317                                     Kolonia Zręby;         Grocery Store   

             2nd Most Common Venue 3rd Most Common Venue  \
73             Sporting Goods Shop           Pizza Place   
85   Vegetarian / Vegan Restaurant                Forest   
258                 Science Museum            Restaurant   
277             Light Rail Station           Coffee Shop   
281                    Gas Station               

Check in which cluster dumpling restaurant is the most popular

In [86]:
# one hot encoding
gdansk_onehot_dumplings = pd.get_dummies(gdansk_venues['Venue Category'], prefix="", prefix_sep="")
print(gdansk_onehot_dumplings.shape)
for col in gdansk_onehot_dumplings.columns:
    print(col, end=", ")
    if col != 'Dumpling Restaurant':
        gdansk_onehot_dumplings.drop(columns=col, inplace=True)

# add neighborhood column back to dataframe
gdansk_onehot_dumplings['Postal Code'] = gdansk_venues['Postal Code'] 

gdansk_dumplings_merged = pd.merge(gdansk_onehot_dumplings, gdansk_merged)

for col in gdansk_dumplings_merged.columns:
    if (col != 'Dumpling Restaurant') & (col != 'Cluster Labels') & (col != 'Postal Code'):
        gdansk_dumplings_merged.drop(columns=col, inplace=True)


(1134, 92)
Asian Restaurant, BBQ Joint, Bakery, Bar, Bed & Breakfast, Beer Garden, Big Box Store, Boat or Ferry, Bookstore, Breakfast Spot, Brewery, Burger Joint, Bus Station, Café, Camera Store, Chinese Restaurant, Clothing Store, Cocktail Bar, Coffee Shop, Concert Hall, Convenience Store, Creperie, Cupcake Shop, Department Store, Dessert Shop, Diner, Dumpling Restaurant, Eastern European Restaurant, Electronics Store, Event Space, Fast Food Restaurant, Food, Food & Drink Shop, Forest, Furniture / Home Store, Gas Station, Gourmet Shop, Grocery Store, Gym, Gym / Fitness Center, Harbor / Marina, Historic Site, History Museum, Hostel, Hotel, Irish Pub, Italian Restaurant, Jewelry Store, Kebab Restaurant, Light Rail Station, Locksmith, Lounge, Market, Metro Station, Mexican Restaurant, Miscellaneous Shop, Movie Theater, Multiplex, Museum, Music Venue, Nightclub, Nursery School, Opera House, Park, Pet Store, Pizza Place, Platform, Playground, Plaza, Polish Restaurant, Pool, Pub, Restaurant

In [87]:
df_clusters_dumpling=gdansk_dumplings_merged.groupby('Cluster Labels').mean()

df_clusters_dumpling

Unnamed: 0_level_0,Dumpling Restaurant
Cluster Labels,Unnamed: 1_level_1
0,0.0
1,0.0
2,0.009091
3,0.005747
4,0.0
5,0.0
6,0.0
7,0.0
8,0.015625
9,0.0


Find the best cluster for establishing new dumpling restaurant

In [89]:
best_cluster =df_clusters_dumpling.values.argmax()
print(best_cluster)


8


Check addresses in the cluster, where dumpling restaurant is still not among the most popular venues

In [91]:
gdansk_merged_best_cluster = gdansk_merged.loc[gdansk_merged['Cluster Labels'] == best_cluster, gdansk_merged.columns[[0] + list(range(5, gdansk_merged.shape[1]))]]

In [92]:
gdansk_merged_best_cluster


Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
292,80-125,Bus Station,Gym,Grocery Store,Fast Food Restaurant,Gas Station,Vegetarian / Vegan Restaurant,Diner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
295,80-103,Café,Hotel,Historic Site,Pizza Place,Polish Restaurant,Coffee Shop,Fast Food Restaurant,Chinese Restaurant,Brewery,Theater


In [93]:
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['1st Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['2nd Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['3rd Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['4th Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['5th Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['6th Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['7th Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['8th Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['8th Most Common Venue'] != 'Dumpling Restaurant']
gdansk_merged_best_cluster = gdansk_merged_best_cluster.loc[gdansk_merged_best_cluster['10th Most Common Venue'] != 'Dumpling Restaurant']

In [94]:
gdansk_merged_best_cluster

Unnamed: 0,Postal Code,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
295,80-103,Café,Hotel,Historic Site,Pizza Place,Polish Restaurant,Coffee Shop,Fast Food Restaurant,Chinese Restaurant,Brewery,Theater


In [95]:
# create map
map_best = folium.Map(location=gdansk_loc, zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster, street in zip(gdansk_merged['Latitude'], gdansk_merged['Longitude'], gdansk_merged['Postal Code'], gdansk_merged['Cluster Labels'], gdansk_merged['Streets']):
    for postalcode in gdansk_merged_best_cluster["Postal Code"]:
        if poi == postalcode:    
            label = folium.Popup(str(street) + ", " + str(poi), parse_html=True)
            folium.CircleMarker(
                [lat, lon],
                radius=5,
                popup=label,
                color=rainbow[cluster-1],
                fill=True,
                fill_color=rainbow[cluster-1],
                fill_opacity=0.7).add_to(map_best)
       
map_best