# Analyzing Potential Location for a New Café

## 1. Download and Explore Dataset

### 1.1 Scrap web page of list of ortsbezirke of Frankfurt am Main

install the BeautifulSoup package

In [1]:
pip install BeautifulSoup4

Collecting BeautifulSoup4
[?25l  Downloading https://files.pythonhosted.org/packages/d1/41/e6495bd7d3781cee623ce23ea6ac73282a373088fcd0ddc809a047b18eae/beautifulsoup4-4.9.3-py3-none-any.whl (115kB)
[K     |████████████████████████████████| 122kB 3.5MB/s eta 0:00:01
[?25hCollecting soupsieve>1.2; python_version >= "3.0" (from BeautifulSoup4)
  Downloading https://files.pythonhosted.org/packages/6f/8f/457f4a5390eeae1cc3aeab89deb7724c965be841ffca6cfca9197482e470/soupsieve-2.0.1-py3-none-any.whl
Installing collected packages: soupsieve, BeautifulSoup4
Successfully installed BeautifulSoup4-4.9.3 soupsieve-2.0.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd # library for data analsysis
import requests
from bs4 import BeautifulSoup

source = requests.get('https://en.wikipedia.org/wiki/List_of_Ortsbezirke_of_Frankfurt_am_Main').text
# transform the data in the table 
soup = BeautifulSoup(source, 'html5lib')
table_rows = soup.tbody.find_all("tr")
res = []
for tr in table_rows:
    td = tr.find_all("td")
    row_roh = [tr.text for tr in td]
    row = [i.strip() for i in row_roh]
    # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and "Not assigned" not in row[1]:
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned" in row[2]:
            row[2] = row[1]
        res.append(row)
# Dataframe with 6 columns
df = pd.DataFrame(res, columns = ["No.", "Borough", "Neighborhood","Population","Area","Population Density"])
df.head()

Unnamed: 0,No.,Borough,Neighborhood,Population,Area,Population Density
0,1,Innenstadt I,"Altstadt, Bahnhofsviertel, Gallus, Gutleutvier...",44.183,8987,4.916
1,2,Innenstadt II,"Bockenheim, Westend-Nord, Westend-Süd",57.629,9493,6.071
2,3,Innenstadt III,"Nordend-Ost, Nordend-West",51.671,4744,10.892
3,4,Bornheim/Ostend,"Ostend, Bornheim",56.723,9212,6.158
4,5,Süd,"Flughafen, Niederrad, Oberrad, Sachsenhausen-S...",91.662,84831,1.081


### 1.2 Download the geographical coordinates of neighbors

In [3]:
pip install geopandas

Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/f7/a4/e66aafbefcbb717813bf3a355c8c4fc3ed04ea1dd7feb2920f2f4f868921/geopandas-0.8.1-py2.py3-none-any.whl (962kB)
[K     |████████████████████████████████| 972kB 8.8MB/s eta 0:00:01
[?25hCollecting pyproj>=2.2.0 (from geopandas)
[?25l  Downloading https://files.pythonhosted.org/packages/e4/ab/280e80a67cfc109d15428c0ec56391fc03a65857b7727cf4e6e6f99a4204/pyproj-3.0.0.post1-cp36-cp36m-manylinux2010_x86_64.whl (6.4MB)
[K     |████████████████████████████████| 6.5MB 8.5MB/s eta 0:00:01                | 1.3MB 8.5MB/s eta 0:00:01
Collecting shapely (from geopandas)
[?25l  Downloading https://files.pythonhosted.org/packages/9d/18/557d4f55453fe00f59807b111cc7b39ce53594e13ada88e16738fb4ff7fb/Shapely-1.7.1-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 33.1MB/s eta 0:00:01
[?25hCollecting fiona (from geopandas)
[?25l  Downloading https://files.pythonhosted.org/packages/

In [4]:
import geopandas as gpd

url = 'https://offenedaten.frankfurt.de/dataset/85b38876-729c-4a78-910c-a52d5c6df8d2/resource/84dff094-ab75-431f-8c64-39606672f1da/download/ffmstadtteilewahlen.geojson'
df_dist = gpd.read_file(url)
print(df_dist.shape)
df_dist.head()

(44, 3)


Unnamed: 0,STTLNR,STTLNAME,geometry
0,1,Altstadt,"POLYGON ((8.68787 50.11416, 8.68789 50.11375, ..."
1,2,Innenstadt,"POLYGON ((8.68683 50.12013, 8.68891 50.11848, ..."
2,4,Westend-Süd,"POLYGON ((8.66070 50.12310, 8.66181 50.12308, ..."
3,5,Westend-Nord,"POLYGON ((8.67113 50.13696, 8.67140 50.13610, ..."
4,6,Nordend-West,"POLYGON ((8.69722 50.13692, 8.69669 50.13579, ..."


#### Use geopy library to get the latitude and longitude values of Frankfurt am Main

In [5]:
!conda install -c conda-forge geopy --yes 

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.9.1
  latest version: 4.9.2

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.11.8  |       ha878542_0         145 KB  conda-forge
    certifi-2020.11.8          |   py36h5fab9bb_0         150 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-2.0.0                |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         392 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forg

In [6]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitude, longitude))

The geograpical coordinate are 50.1106444, 8.6820917.


#### Create a map of Frankfurt am Main with neighborhoods

In [7]:
import folium 
import random

def random_html_color():
    r = random.randint(0,256)
    g = random.randint(0,256)
    b = random.randint(0,256)
    return '#%02x%02x%02x' % (r, g, b)

def style_fcn(x):
    return { 'fillColor': random_html_color(), 'color': "#000", 'weight' : 0.5}

map_FRAneighbor = folium.Map(location=[latitude, longitude], zoom_start=10)

folium.GeoJson(df_dist['geometry'],style_function=style_fcn).add_to(map_FRAneighbor)
folium.Circle(location=[latitude, longitude], radius=4500, popup='city', color="#008080").add_to(map_FRAneighbor)     # df_neighborGeo.loc[0,'Latitude'],df_neighborGeo.loc[0,'Longitude']

map_FRAneighbor

#### create a neu dataframe to save the coordinates in the city

In [8]:
df_distCity = df_dist.loc[[0,1,8,9,10,3,2,5,4,6,7,14,13,11,12]]
df_distCity.reset_index(inplace=True,drop=True)
df_distCity

Unnamed: 0,STTLNR,STTLNAME,geometry
0,1,Altstadt,"POLYGON ((8.68787 50.11416, 8.68789 50.11375, ..."
1,2,Innenstadt,"POLYGON ((8.68683 50.12013, 8.68891 50.11848, ..."
2,10,Gutleut-/Bahnhofsviertel,"POLYGON ((8.67156 50.10376, 8.66937 50.10247, ..."
3,11,Gallus,"POLYGON ((8.66063 50.10947, 8.66200 50.10882, ..."
4,12,Bockenheim,"POLYGON ((8.64223 50.13694, 8.64258 50.13692, ..."
5,5,Westend-Nord,"POLYGON ((8.67113 50.13696, 8.67140 50.13610, ..."
6,4,Westend-Süd,"POLYGON ((8.66070 50.12310, 8.66181 50.12308, ..."
7,7,Nordend-Ost,"POLYGON ((8.69783 50.11916, 8.69684 50.11830, ..."
8,6,Nordend-West,"POLYGON ((8.69722 50.13692, 8.69669 50.13579, ..."
9,8,Ostend,"POLYGON ((8.74758 50.12123, 8.74726 50.12050, ..."


#### calculate the geographical polygon to center point

In [10]:
import re

distLat = []
distLng = []
for Polygon in df_distCity['geometry']:
    g = re.findall(r"[-+]?\d*\.?\d+|\d+", Polygon.centroid.wkt)
    distLat.append(float(g[1]))
    distLng.append(float(g[0]))
    
d = {'dist': df_distCity['STTLNAME'], 'Latitude': distLat, 'Longitude': distLng}
df_CityNeighborGeo = pd.DataFrame(data = d)
df_CityNeighborGeo.to_csv('FRA_CityDistGeo.csv')
print(df_CityNeighborGeo.shape)
df_CityNeighborGeo.head()

(15, 3)


Unnamed: 0,dist,Latitude,Longitude
0,Altstadt,50.110597,8.682386
1,Innenstadt,50.113791,8.682665
2,Gutleut-/Bahnhofsviertel,50.099673,8.651434
3,Gallus,50.103223,8.635295
4,Bockenheim,50.121288,8.632922


In [11]:
map_FRACityNeighbor = folium.Map(location=[latitude, longitude], zoom_start=10)

folium.GeoJson(df_distCity['geometry'],style_function=style_fcn).add_to(map_FRACityNeighbor)

for lat, lng, dist in zip(df_CityNeighborGeo['Latitude'], df_CityNeighborGeo['Longitude'], df_CityNeighborGeo['dist']):
    label = '{}'.format(dist)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color= '#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_FRACityNeighbor)    
    
map_FRACityNeighbor

#### Because Sachsenhausen-Süd is mostly comprised by the Frankfurt City Forest, so the neu coordinate should be chosen.

In [12]:
# 50.0823657,8.6909661
df_CityNeighborGeo.loc[14,'Latitude'] = 50.0823
df_CityNeighborGeo.loc[14,'Longitude'] = 8.6910
df_CityNeighborGeo.loc[14]

dist         Sachsenhausen-Süd
Latitude               50.0823
Longitude                8.691
Name: 14, dtype: object

In [13]:
map_FRACityNeighbor = folium.Map(location=[latitude, longitude], zoom_start=10)

folium.GeoJson(df_distCity['geometry'],style_function=style_fcn).add_to(map_FRACityNeighbor)

for lat, lng, dist in zip(df_CityNeighborGeo['Latitude'], df_CityNeighborGeo['Longitude'], df_CityNeighborGeo['dist']):
    label = '{}'.format(dist)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color= '#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_FRACityNeighbor)    
    
map_FRACityNeighbor

## 2.Explore Neighborhoods in Frankfurt am Main

#### Define Foursquare Credentials and Version

In [14]:
CLIENT_ID = 'H0CBQIXYOGFKN13OASIN4DE0JSXNQ2ZF23LB2SHNHJCRVIHD' # Foursquare ID
CLIENT_SECRET = 'MU4EEK2CCMTVJLGZOWJXSTTLL2NEACQOITGUUDAUB34YZEUP' # Foursquare Secret
VERSION = '20180604'
LIMIT = 250
radius = 2000

#### create a function to get the venues to each neighborhoods

In [15]:
def getNearbyVenues(names, latitudes, longitudes):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
#        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                CLIENT_ID,
                CLIENT_SECRET,
                VERSION,
                lat,
                lng,
                radius,
                LIMIT)
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
#         results = json_normalize(results)
#        if name == 'Altstadt':
#            print(results)

        # return only relevant information for each nearby venue
        venues_list.append([(
                name,
                lat,
                lng,
                v['venue']['name'],
#                v['venue']['location']['address'],                
                v['venue']['location']['lat'],
                v['venue']['location']['lng'],
                v['venue']['location']['distance'],
                v['venue']['location']['formattedAddress'][0],
#                v['venue']['location']['neighborhood'],
                v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
#     print(nearby_venues)
    nearby_venues.columns = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Distance', 'Address', 'Venue Category']
    
    return(nearby_venues)

#### create a new dataframe called DistVenues

In [16]:
DistVenues = getNearbyVenues(names=df_CityNeighborGeo['dist'],latitudes=df_CityNeighborGeo['Latitude'],longitudes=df_CityNeighborGeo['Longitude'])
print(DistVenues.shape)
DistVenues.head()

(1389, 9)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
0,Altstadt,50.110597,8.682386,Kleinmarkthalle,50.112778,8.682958,246,Hasengasse 5-7,Market
1,Altstadt,50.110597,8.682386,SCHIRN Kunsthalle,50.110291,8.683542,89,Römerberg 6 (Bendergasse),Art Museum
2,Altstadt,50.110597,8.682386,Römerberg,50.110489,8.682131,21,Römerberg,Plaza
3,Altstadt,50.110597,8.682386,Weinterasse Rollanderhof,50.112473,8.682164,209,Hasengasse 5-7,Wine Bar
4,Altstadt,50.110597,8.682386,Góc Phố,50.113509,8.681686,328,Schärfengäßchen 6 (Holzgraben),Vietnamese Restaurant


In [56]:
DistVenues.to_csv('VenuesEachCityDist.csv')

#### Check if there are redundant data

In [17]:
DistVenues = DistVenues.drop_duplicates(subset=['Venue', 'Venue Latitude', 'Venue Longitude'], keep='last').reset_index(drop=True)
print(DistVenues.shape)

(721, 9)


In [18]:
DistVenues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
0,Innenstadt,50.113791,8.682665,Zeil,50.114634,8.684049,136,Zeil 1-116,Pedestrian Plaza
1,Innenstadt,50.113791,8.682665,Café Karin,50.111621,8.678097,405,Großer Hirschgraben 28,Café
2,Innenstadt,50.113791,8.682665,Goetheplatz,50.112584,8.676767,441,Goetheplatz,Plaza
3,Innenstadt,50.113791,8.682665,Mikuni,50.113125,8.68611,256,Fahrgasse 91-95,Japanese Restaurant
4,Innenstadt,50.113791,8.682665,COS,50.113553,8.676689,427,Goetheplatz 4,Clothing Store


#### Check 'Venue Category' with 'Neighborhood'

In [19]:
L1 = DistVenues.index[DistVenues['Venue Category'] == 'Neighborhood'].tolist()
L1

[]

#### Drop rows of 'Venue Category' with 'Neighborhood'

In [20]:
DistVenues = DistVenues.drop(L1).reset_index(drop=True)
DistVenues.shape

(721, 9)

In [60]:
DistVenues.to_csv('VenuesEachCityDistNoDupli.csv')

#### Cleaning category

In [21]:
L = DistVenues.index[DistVenues['Venue Category'] == 'Japanese Restaurant'].tolist()
L

[3, 33, 141, 165, 192, 206, 220, 363, 521, 645]

In [22]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Sushi Restaurant'].tolist()
L2

[247, 254, 448, 565, 649, 701]

In [23]:
DistVenues['Venue Category'].replace('Sushi Restaurant','Japanese Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Sushi Restaurant'].tolist()
L2

[]

In [24]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Ice Cream Shop'].tolist()
L2

[49, 266, 334, 381, 384, 400, 490, 534, 602, 651, 656]

In [25]:
DistVenues['Venue Category'].replace('Ice Cream Shop','Coffee Shop',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Ice Cream Shop'].tolist()
L2

[]

In [26]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Café'].tolist()

In [27]:
DistVenues['Venue Category'].replace('Café','Coffee Shop',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Café'].tolist()
L2

[]

In [28]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Trattoria/Osteria'].tolist()
L2

[20, 356, 588]

In [29]:
DistVenues['Venue Category'].replace('Trattoria/Osteria','Italian Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Trattoria/Osteria'].tolist()
L2

[]

In [30]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Tea Room'].tolist()
L2

[389]

In [71]:
#DistVenues.loc[149,'Venue Category'] = 'Teahouse' #.replace('Tea Room', 'Teahouse',inplace=True)
DistVenues.loc[L2]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
389,Bornheim,50.130908,8.712408,Phoenix Tea,50.124285,8.691866,1640,Friedberger Landstr. 82,Tea Room


In [31]:
DistVenues.loc[L2,'Venue Category'] = 'Teahouse' #.replace('Tea Room', 'Teahouse',inplace=True)
DistVenues.loc[L2]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
389,Bornheim,50.130908,8.712408,Phoenix Tea,50.124285,8.691866,1640,Friedberger Landstr. 82,Teahouse


In [32]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Tapas Restaurant'].tolist()
L2

[121, 358]

In [33]:
DistVenues['Venue Category'].replace('Tapas Restaurant','Spanish Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Tapas Restaurant'].tolist()
L2

[]

In [34]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Breakfast Spot'].tolist()
L2

[229]

In [35]:
DistVenues['Venue Category'].replace('Breakfast Spot','Coffee Shop',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Breakfast Spot'].tolist()
L2

[]

In [36]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Brewery'].tolist()
L2

[265, 695, 703]

In [37]:
DistVenues['Venue Category'].replace('Brewery','German Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Brewery'].tolist()
L2

[]

In [38]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Beer Garden'].tolist()
L2

[424, 437, 606]

In [39]:
DistVenues['Venue Category'].replace('Beer Garden','German Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Beer Garden'].tolist()
L2

[]

In [40]:
# Permanently closed
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Cajun / Creole Restaurant'].tolist()
DistVenues = DistVenues.drop(L2).reset_index(drop=True)
DistVenues.shape

(721, 9)

In [41]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Dessert Shop'].tolist()
L2

[303]

In [42]:
DistVenues['Venue Category'].replace('Dessert Shop','Coffee Shop',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Dessert Shop'].tolist()
L2

[]

In [46]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Doner Restaurant'].tolist()
L2

[170, 537, 696]

In [47]:
DistVenues['Venue Category'].replace('Doner Restaurant','Turkish Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Doner Restaurant'].tolist()
L2

[]

In [48]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Falafel Restaurant'].tolist()
L2

[258, 341, 586]

In [49]:
DistVenues['Venue Category'].replace('Falafel Restaurant','Turkish Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Falafel Restaurant'].tolist()
L2

[]

In [50]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Fried Chicken Joint'].tolist()
L2

[]

In [51]:
DistVenues['Venue Category'].replace('Fried Chicken Joint','Fast Food Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Fried Chicken Joint'].tolist()
L2

[]

In [52]:
L2 = DistVenues.index[DistVenues['Venue'] == 'Soul Food Factory East'].tolist()
DistVenues.loc[L2, 'Venue Category'] = 'American Restaurant'
DistVenues.loc[L2]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
522,Oberrad,50.099241,8.727156,Soul Food Factory East,50.111359,8.711125,1769,Hanauer Landstr. 124,American Restaurant


In [53]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Currywurst Joint'].tolist()
L2

[63, 156, 365, 515]

In [54]:
DistVenues['Venue Category'].replace('Currywurst Joint','Fast Food Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Currywurst Joint'].tolist()
L2

[]

In [55]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Apple Wine Pub'].tolist()
L2

[291, 327, 597, 599, 610, 622]

In [56]:
DistVenues['Venue Category'].replace('Apple Wine Pub','German Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Apple Wine Pub'].tolist()
L2

[]

In [57]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Bratwurst Joint'].tolist()
L2

[]

In [58]:
DistVenues['Venue Category'].replace('Bratwurst Joint','Fast Food Restaurant',inplace=True)
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Bratwurst Joint'].tolist()
L2

[]

In [59]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Comfort Food Restaurant'].tolist()
DistVenues.loc[L2, 'Venue Category'] = 'Balkan cuisine Restaurant'
DistVenues.loc[L2]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category


In [60]:
L2 = DistVenues.index[DistVenues['Venue Category'] == 'Taco Place'].tolist()
DistVenues.loc[L2, 'Venue Category'] = 'Mexican Restaurant'
DistVenues.loc[L2]

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
105,Bockenheim,50.121288,8.632922,Tacohaus,50.116657,8.643817,932,Hamburger Allee 59,Mexican Restaurant


#### check how many venues were returned for each neighborhood

In [61]:
DistVenuesOrg = pd.read_csv('VenuesEachCityDistNoDupli.csv')

In [62]:
df_VenuesProDist = DistVenuesOrg.groupby('Neighborhood').count()
df_VenuesProDist.head()

Unnamed: 0_level_0,Unnamed: 0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bockenheim,71,71,71,71,71,71,71,71,71
Bornheim,98,98,98,98,98,98,98,98,98
Gallus,56,56,56,56,56,56,56,56,56
Gutleut-/Bahnhofsviertel,17,17,17,17,17,17,17,17,17
Innenstadt,12,12,12,12,12,12,12,12,12


#### find out how many unique categories can be curated from all the returned venues

In [63]:
print('There are {} uniques categories.'.format(len(DistVenuesOrg['Venue Category'].unique())))

There are 180 uniques categories.


#### find out all categories for catering

In [64]:
#DistVenues.shape[0]
DistCaterings = DistVenues.copy()
#DistCaterings.shape[0]
for i in range(0,DistCaterings.shape[0]):
#    print(DistCaterings.loc[i,'Venue Category'])
    if 'Restaurant' in DistCaterings.loc[i,'Venue Category']:
        continue
    if 'Bar' in DistCaterings.loc[i,'Venue Category']:
        if DistCaterings.loc[i,'Venue Category'] == 'Hookah Bar': #, 'Hotel Bar', 'Karaoke Bar',
            DistCaterings.drop(index=i,inplace=True)
        elif DistCaterings.loc[i,'Venue Category'] == 'Hotel Bar':
            DistCaterings.drop(index=i,inplace=True) 
        elif DistCaterings.loc[i,'Venue Category'] == 'Karaoke Bar':
            DistCaterings.drop(index=i,inplace=True)
        continue
    if 'Place' in DistCaterings.loc[i,'Venue Category']:
        continue
    if 'Pub' in DistCaterings.loc[i,'Venue Category']:
        continue    
    if 'Joint' in DistCaterings.loc[i,'Venue Category']:
        continue
    if DistCaterings.loc[i,'Venue Category'] == 'Coffee Shop':
        continue
    if DistCaterings.loc[i,'Venue Category'] == 'Teahouse':
        continue
    if DistCaterings.loc[i,'Venue Category'] == 'Bakery':
        continue
    if DistCaterings.loc[i,'Venue Category'] == 'Steakhouse':
        continue
    DistCaterings.drop(index=i,inplace=True)
DistCaterings.reset_index(drop=True,inplace=True)
print('caterings:',DistCaterings.shape, 'all venues:',DistVenues.shape)
DistCaterings.head()


caterings: (406, 9) all venues: (721, 9)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Distance,Address,Venue Category
0,Innenstadt,50.113791,8.682665,Café Karin,50.111621,8.678097,405,Großer Hirschgraben 28,Coffee Shop
1,Innenstadt,50.113791,8.682665,Mikuni,50.113125,8.68611,256,Fahrgasse 91-95,Japanese Restaurant
2,Innenstadt,50.113791,8.682665,Walden,50.111667,8.67825,393,Kleiner Hirschgraben 7 (Weißadlergasse),Restaurant
3,Innenstadt,50.113791,8.682665,Paulaner am Dom,50.110876,8.685925,399,Domplatz 6,German Restaurant
4,Gutleut-/Bahnhofsviertel,50.099673,8.651434,Dorade am Main,50.097209,8.664123,946,Dorade am Main Carl-von-Noorden Platz 5,Seafood Restaurant


In [65]:
DistCaterings['Venue Category'].unique()

array(['Coffee Shop', 'Japanese Restaurant', 'Restaurant',
       'German Restaurant', 'Seafood Restaurant', 'Asian Restaurant',
       'Malay Restaurant', 'Vietnamese Restaurant', 'Italian Restaurant',
       'Moroccan Restaurant', 'Modern European Restaurant', 'Pizza Place',
       'Burger Joint', 'Greek Restaurant', 'Korean Restaurant',
       'Mexican Restaurant', 'Turkish Restaurant', 'Thai Restaurant',
       'Fast Food Restaurant', 'Bakery', 'Sandwich Place',
       'African Restaurant', 'Piano Bar', 'Bar',
       'Middle Eastern Restaurant', 'Vegetarian / Vegan Restaurant',
       'Persian Restaurant', 'Spanish Restaurant', 'Pub', 'Cocktail Bar',
       'Steakhouse', 'Ramen Restaurant', 'Chinese Restaurant',
       'Mediterranean Restaurant', 'French Restaurant',
       'Indian Restaurant', 'Wine Bar', 'Portuguese Restaurant',
       'American Restaurant', 'Ethiopian Restaurant',
       'Argentinian Restaurant', 'Tibetan Restaurant', 'Czech Restaurant',
       'Irish Pub', 'Eas

In [66]:
print('There are {} uniques caterings.'.format(len(DistCaterings['Venue Category'].unique())))

There are 50 uniques caterings.


## 3. Analyze Each Neighborhood

### 3.1 Analyze each neighborhood with all venues

In [67]:
# one hot encoding
Dist_onehot = pd.get_dummies(DistVenuesOrg[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
Dist_onehot['Neighborhood'] = DistVenuesOrg['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [Dist_onehot.columns[-1]] + list(Dist_onehot.columns[:-1])
Dist_onehot = Dist_onehot[fixed_columns]
print(Dist_onehot.shape)
Dist_onehot.head()

(721, 181)


Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Apple Wine Pub,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Toy / Game Store,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfront,Wine Bar,Zoo
0,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [114]:
Dist_onehot.to_csv('StatisticCategoryCityDist.csv')

#### group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [68]:
dist_grouped = Dist_onehot.groupby('Neighborhood').mean().reset_index()
print(dist_grouped.shape)
dist_grouped.head()

(14, 181)


Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Apple Wine Pub,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Toy / Game Store,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Waterfront,Wine Bar,Zoo
0,Bockenheim,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.028169,0.0,...,0.0,0.0,0.0,0.0,0.042254,0.014085,0.0,0.0,0.0,0.0
1,Bornheim,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.010204,0.0,...,0.0,0.0,0.0,0.010204,0.0,0.0,0.010204,0.0,0.020408,0.0
2,Gallus,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.017857,0.0,0.0,0.0
3,Gutleut-/Bahnhofsviertel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.058824,...,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0
4,Innenstadt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### write a function to sort the venues in descending order

In [69]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### create the new dataframe and display the top 10 venues for each neighborhood

In [70]:
import numpy as np

num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = dist_grouped['Neighborhood']

for ind in np.arange(dist_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dist_grouped.iloc[ind, :], num_top_venues)
    
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bockenheim,Café,Italian Restaurant,Hotel,Supermarket,Thai Restaurant,Turkish Restaurant,Bakery,Park,Spanish Restaurant,Drugstore
1,Bornheim,Café,Italian Restaurant,German Restaurant,Ice Cream Shop,Plaza,Gym / Fitness Center,Park,Pub,Bakery,Pizza Place
2,Gallus,Italian Restaurant,Restaurant,Hotel,Coffee Shop,Supermarket,Drugstore,Bakery,Café,Pizza Place,Park
3,Gutleut-/Bahnhofsviertel,Hotel,Seafood Restaurant,Asian Restaurant,Malay Restaurant,Moroccan Restaurant,Bistro,Café,Athletics & Sports,Italian Restaurant,Trattoria/Osteria
4,Innenstadt,Boutique,Plaza,Pedestrian Plaza,Japanese Restaurant,Monument / Landmark,Restaurant,Café,Clothing Store,German Restaurant,Ethiopian Restaurant


### 3.2 Analyze each neighborhood only with caterings

In [71]:
# one hot encoding
DistCaterings_onehot = pd.get_dummies(DistCaterings[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
DistCaterings_onehot['Neighborhood'] = DistCaterings['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [DistCaterings_onehot.columns[-1]] + list(DistCaterings_onehot.columns[:-1])
DistCaterings_onehot = DistCaterings_onehot[fixed_columns]
print(DistCaterings_onehot.shape)
DistCaterings_onehot.head()

(406, 51)


Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Burger Joint,...,Soup Place,Spanish Restaurant,Steakhouse,Teahouse,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar
0,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Innenstadt,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Gutleut-/Bahnhofsviertel,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [72]:
DistCaterings_grouped = DistCaterings_onehot.groupby('Neighborhood').mean().reset_index()
print(DistCaterings_grouped.shape)
DistCaterings_grouped.head()

(14, 51)


Unnamed: 0,Neighborhood,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Burger Joint,...,Soup Place,Spanish Restaurant,Steakhouse,Teahouse,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar
0,Bockenheim,0.0,0.0,0.0,0.047619,0.0,0.047619,0.047619,0.0,0.02381,...,0.0,0.071429,0.0,0.0,0.071429,0.0,0.071429,0.02381,0.0,0.0
1,Bornheim,0.0,0.0,0.0,0.012987,0.0,0.038961,0.025974,0.0,0.012987,...,0.0,0.038961,0.0,0.012987,0.012987,0.0,0.012987,0.0,0.012987,0.025974
2,Gallus,0.027778,0.0,0.0,0.055556,0.0,0.055556,0.0,0.0,0.027778,...,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.027778,0.0
3,Gutleut-/Bahnhofsviertel,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
4,Innenstadt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### create the new dataframe and display the top 10 caterings for each neighborhood

In [73]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighborhoods_caterings_sorted = pd.DataFrame(columns=columns)
neighborhoods_caterings_sorted['Neighborhood'] = DistCaterings_grouped['Neighborhood']

for ind in np.arange(DistCaterings_grouped.shape[0]):
    neighborhoods_caterings_sorted.iloc[ind, 1:] = return_most_common_venues(DistCaterings_grouped.iloc[ind, :], num_top_venues)
    
neighborhoods_caterings_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bockenheim,Coffee Shop,Italian Restaurant,Turkish Restaurant,Thai Restaurant,Spanish Restaurant,Pizza Place,Mexican Restaurant,Asian Restaurant,Bakery,Bar
1,Bornheim,Coffee Shop,Italian Restaurant,German Restaurant,Bakery,Pub,Spanish Restaurant,Wine Bar,Mexican Restaurant,Pizza Place,Restaurant
2,Gallus,Coffee Shop,Italian Restaurant,Restaurant,Pizza Place,Asian Restaurant,Bakery,Seafood Restaurant,Korean Restaurant,Burger Joint,Fast Food Restaurant
3,Gutleut-/Bahnhofsviertel,Italian Restaurant,Seafood Restaurant,Vietnamese Restaurant,Asian Restaurant,Malay Restaurant,Modern European Restaurant,Moroccan Restaurant,Coffee Shop,Ethiopian Restaurant,Indian Restaurant
4,Innenstadt,Restaurant,Coffee Shop,German Restaurant,Japanese Restaurant,Irish Pub,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Fast Food Restaurant


## 4. Cluster Neighborhoods

### 4.1 Cluster Neighborhoods only with caterings

#### Run k-means to cluster the neighborhood into clusters only with caterings

In [74]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 3
DistCaterings_grouped_clustering = DistCaterings_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(DistCaterings_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 2, 0, 2, 0, 1, 2, 1], dtype=int32)

#### drop the neighborhoods that are not in the dataframe 'DistCaterings_grouped'

In [75]:
df_CityNeighborGeo.shape

(15, 3)

In [76]:
neighborGeo = df_CityNeighborGeo.copy()
neighborGeo.rename(columns={"dist": "Neighborhood"},inplace=True)
print(neighborGeo.shape)
neighborGeo.head()

(15, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Altstadt,50.110597,8.682386
1,Innenstadt,50.113791,8.682665
2,Gutleut-/Bahnhofsviertel,50.099673,8.651434
3,Gallus,50.103223,8.635295
4,Bockenheim,50.121288,8.632922


In [77]:
for x in neighborGeo['Neighborhood']:
    
    if x not in neighborhoods_caterings_sorted['Neighborhood'].tolist():
        l = neighborGeo.index[neighborGeo['Neighborhood'] == x].tolist()
        neighborGeo.drop(l,inplace=True)

print(neighborGeo.shape)
neighborGeo.head()

(14, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
1,Innenstadt,50.113791,8.682665
2,Gutleut-/Bahnhofsviertel,50.099673,8.651434
3,Gallus,50.103223,8.635295
4,Bockenheim,50.121288,8.632922
5,Westend-Nord,50.12877,8.666489


#### create a new dataframe that includes the cluster as well as the top 10 caterings for each neighborhood

In [78]:
# add clustering labels
neighborhoods_caterings_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_catering_merged = neighborGeo
# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
neighborhoods_catering_merged = neighborhoods_catering_merged.join(neighborhoods_caterings_sorted.set_index('Neighborhood'), on='Neighborhood')
neighborhoods_catering_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Innenstadt,50.113791,8.682665,0,Restaurant,Coffee Shop,German Restaurant,Japanese Restaurant,Irish Pub,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Fast Food Restaurant
2,Gutleut-/Bahnhofsviertel,50.099673,8.651434,2,Italian Restaurant,Seafood Restaurant,Vietnamese Restaurant,Asian Restaurant,Malay Restaurant,Modern European Restaurant,Moroccan Restaurant,Coffee Shop,Ethiopian Restaurant,Indian Restaurant
3,Gallus,50.103223,8.635295,1,Coffee Shop,Italian Restaurant,Restaurant,Pizza Place,Asian Restaurant,Bakery,Seafood Restaurant,Korean Restaurant,Burger Joint,Fast Food Restaurant
4,Bockenheim,50.121288,8.632922,1,Coffee Shop,Italian Restaurant,Turkish Restaurant,Thai Restaurant,Spanish Restaurant,Pizza Place,Mexican Restaurant,Asian Restaurant,Bakery,Bar
5,Westend-Nord,50.12877,8.666489,0,Italian Restaurant,Turkish Restaurant,Fast Food Restaurant,Burger Joint,Restaurant,Japanese Restaurant,Coffee Shop,Eastern European Restaurant,Irish Pub,Indian Restaurant


#### visualize the resulting clusters

In [79]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_catering_merged['Latitude'], neighborhoods_catering_merged['Longitude'], neighborhoods_catering_merged['Neighborhood'], neighborhoods_catering_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
map_clusters


### 4.2 Cluster Neighborhoods with all venues

#### Run k-means to cluster the neighborhood into 6 clusters

In [80]:
# set number of clusters
kclusters = 3
dist_grouped_clustering = dist_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dist_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 0, 0, 1, 0, 0, 2, 0, 2], dtype=int32)

#### create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [81]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_merged = neighborGeo
# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
neighborhoods_venues_merged = neighborhoods_venues_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
neighborhoods_venues_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Innenstadt,50.113791,8.682665,1,Boutique,Plaza,Pedestrian Plaza,Japanese Restaurant,Monument / Landmark,Restaurant,Café,Clothing Store,German Restaurant,Ethiopian Restaurant
2,Gutleut-/Bahnhofsviertel,50.099673,8.651434,0,Hotel,Seafood Restaurant,Asian Restaurant,Malay Restaurant,Moroccan Restaurant,Bistro,Café,Athletics & Sports,Italian Restaurant,Trattoria/Osteria
3,Gallus,50.103223,8.635295,0,Italian Restaurant,Restaurant,Hotel,Coffee Shop,Supermarket,Drugstore,Bakery,Café,Pizza Place,Park
4,Bockenheim,50.121288,8.632922,2,Café,Italian Restaurant,Hotel,Supermarket,Thai Restaurant,Turkish Restaurant,Bakery,Park,Spanish Restaurant,Drugstore
5,Westend-Nord,50.12877,8.666489,0,Indie Movie Theater,Restaurant,Food & Drink Shop,Deli / Bodega,Speakeasy,Market,Burger Joint,Road,Gym / Fitness Center,Gourmet Shop


#### visualize the resulting clusters

In [82]:
# create map
map_clusters_venues = folium.Map(location=[latitude, longitude], zoom_start=11)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_venues_merged['Latitude'], neighborhoods_venues_merged['Longitude'], neighborhoods_venues_merged['Neighborhood'], neighborhoods_venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_venues)
    
map_clusters_venues

## 5. Examine Clusters

#### examine each cluster and determine the discriminating venue categories that distinguish each cluster

### 5.1 Neighborhoods only with caterings

#### Cluster 1

In [83]:
neighborhoods_catering_merged.loc[neighborhoods_catering_merged['Cluster Labels'] == 0, neighborhoods_catering_merged.columns[[0] + list(range(4, neighborhoods_catering_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Innenstadt,Restaurant,Coffee Shop,German Restaurant,Japanese Restaurant,Irish Pub,Indian Restaurant,Hawaiian Restaurant,Greek Restaurant,French Restaurant,Fast Food Restaurant
5,Westend-Nord,Italian Restaurant,Turkish Restaurant,Fast Food Restaurant,Burger Joint,Restaurant,Japanese Restaurant,Coffee Shop,Eastern European Restaurant,Irish Pub,Indian Restaurant
7,Nordend-Ost,Japanese Restaurant,Restaurant,Steakhouse,Coffee Shop,Wine Bar,Turkish Restaurant,American Restaurant,Asian Restaurant,Eastern European Restaurant,Irish Pub


#### Cluster 2

In [84]:
neighborhoods_catering_merged.loc[neighborhoods_catering_merged['Cluster Labels'] == 1, neighborhoods_catering_merged.columns[[0] + list(range(4, neighborhoods_catering_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Gallus,Coffee Shop,Italian Restaurant,Restaurant,Pizza Place,Asian Restaurant,Bakery,Seafood Restaurant,Korean Restaurant,Burger Joint,Fast Food Restaurant
4,Bockenheim,Coffee Shop,Italian Restaurant,Turkish Restaurant,Thai Restaurant,Spanish Restaurant,Pizza Place,Mexican Restaurant,Asian Restaurant,Bakery,Bar
6,Westend-Süd,Indian Restaurant,Coffee Shop,Italian Restaurant,Steakhouse,Japanese Restaurant,Bakery,Pizza Place,French Restaurant,Asian Restaurant,Bar
8,Nordend-West,Coffee Shop,Italian Restaurant,German Restaurant,Bakery,Indian Restaurant,Vietnamese Restaurant,Greek Restaurant,Mediterranean Restaurant,French Restaurant,Ethiopian Restaurant
9,Ostend,Thai Restaurant,Italian Restaurant,Coffee Shop,Pizza Place,Argentinian Restaurant,Tibetan Restaurant,German Restaurant,Bar,Wine Bar,Indian Restaurant
10,Bornheim,Coffee Shop,Italian Restaurant,German Restaurant,Bakery,Pub,Spanish Restaurant,Wine Bar,Mexican Restaurant,Pizza Place,Restaurant
13,Sachsenhausen-Nord,Coffee Shop,German Restaurant,Bar,Wine Bar,Italian Restaurant,Burger Joint,Thai Restaurant,Japanese Restaurant,Vietnamese Restaurant,Bakery


#### Cluster 3

In [85]:
neighborhoods_catering_merged.loc[neighborhoods_catering_merged['Cluster Labels'] == 2, neighborhoods_catering_merged.columns[[0] + list(range(4, neighborhoods_catering_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Gutleut-/Bahnhofsviertel,Italian Restaurant,Seafood Restaurant,Vietnamese Restaurant,Asian Restaurant,Malay Restaurant,Modern European Restaurant,Moroccan Restaurant,Coffee Shop,Ethiopian Restaurant,Indian Restaurant
11,Niederrad,Italian Restaurant,German Restaurant,Indian Restaurant,Restaurant,Japanese Restaurant,Soup Place,Portuguese Restaurant,Burger Joint,Greek Restaurant,Bar
12,Oberrad,Italian Restaurant,German Restaurant,Pizza Place,Coffee Shop,Restaurant,American Restaurant,Japanese Restaurant,Cocktail Bar,Vietnamese Restaurant,Hawaiian Restaurant
14,Sachsenhausen-Süd,German Restaurant,Italian Restaurant,Bakery,Japanese Restaurant,Greek Restaurant,Coffee Shop,Restaurant,Bar,Korean Restaurant,Steakhouse


### 5.2 Neighborhood with all venues

#### Cluster 1

In [86]:
neighborhoods_venues_merged.loc[neighborhoods_venues_merged['Cluster Labels'] == 0, neighborhoods_venues_merged.columns[[0] + list(range(4, neighborhoods_venues_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Gutleut-/Bahnhofsviertel,Hotel,Seafood Restaurant,Asian Restaurant,Malay Restaurant,Moroccan Restaurant,Bistro,Café,Athletics & Sports,Italian Restaurant,Trattoria/Osteria
3,Gallus,Italian Restaurant,Restaurant,Hotel,Coffee Shop,Supermarket,Drugstore,Bakery,Café,Pizza Place,Park
5,Westend-Nord,Indie Movie Theater,Restaurant,Food & Drink Shop,Deli / Bodega,Speakeasy,Market,Burger Joint,Road,Gym / Fitness Center,Gourmet Shop
6,Westend-Süd,Hotel,Indian Restaurant,Café,Steakhouse,Italian Restaurant,Bakery,Botanical Garden,Japanese Restaurant,French Restaurant,Chinese Restaurant
7,Nordend-Ost,Restaurant,Sushi Restaurant,Coffee Shop,Toy / Game Store,Wine Bar,Fountain,Clothing Store,Steakhouse,Supermarket,Hotel
11,Niederrad,Hotel,Supermarket,Italian Restaurant,Soccer Stadium,Beer Garden,Indian Restaurant,Restaurant,Gas Station,Nightclub,Train Station
12,Oberrad,Hotel,Supermarket,Pizza Place,Italian Restaurant,German Restaurant,Tram Station,Gym / Fitness Center,Nightclub,Art Gallery,Escape Room
14,Sachsenhausen-Süd,Supermarket,Italian Restaurant,Tram Station,Bakery,German Restaurant,Brewery,Playground,Hotel,Gastropub,Restaurant


#### Cluster 2

In [87]:
neighborhoods_venues_merged.loc[neighborhoods_venues_merged['Cluster Labels'] == 1, neighborhoods_venues_merged.columns[[0] + list(range(4, neighborhoods_venues_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Innenstadt,Boutique,Plaza,Pedestrian Plaza,Japanese Restaurant,Monument / Landmark,Restaurant,Café,Clothing Store,German Restaurant,Ethiopian Restaurant


#### Cluster 3

In [88]:
neighborhoods_venues_merged.loc[neighborhoods_venues_merged['Cluster Labels'] == 2, neighborhoods_venues_merged.columns[[0] + list(range(4, neighborhoods_venues_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Bockenheim,Café,Italian Restaurant,Hotel,Supermarket,Thai Restaurant,Turkish Restaurant,Bakery,Park,Spanish Restaurant,Drugstore
8,Nordend-West,Café,Lounge,Italian Restaurant,Park,Hotel,Bakery,Apple Wine Pub,Falafel Restaurant,Garden,Drugstore
9,Ostend,Thai Restaurant,Italian Restaurant,Café,Zoo,German Restaurant,Nightclub,Outdoor Supply Store,Park,Pizza Place,Big Box Store
10,Bornheim,Café,Italian Restaurant,German Restaurant,Ice Cream Shop,Plaza,Gym / Fitness Center,Park,Pub,Bakery,Pizza Place
13,Sachsenhausen-Nord,Café,Art Museum,German Restaurant,Bar,Apple Wine Pub,Waterfront,Ice Cream Shop,Hotel,Wine Bar,Burger Joint


## 6. Locations of Schools in Sachsenhausen-Nord

In [90]:
address = 'Bergiusschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeB = location.latitude
longitudeB = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeB, longitudeB))

The geograpical coordinate are 50.1066715, 8.6917217.


In [93]:
address = 'Deutschherrenschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeD = location.latitude
longitudeD = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeD, longitudeD))

The geograpical coordinate are 50.10427525, 8.69448825425006.


In [97]:
# Freiherr-vom-Stein-Schule
address = 'Schweizer Str. 87,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeF = location.latitude
longitudeF = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeF, longitudeF))

The geograpical coordinate are 50.0995303, 8.683614347459763.


In [98]:
# Mühlbergschule
address = 'Mühlbergschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeM = location.latitude
longitudeM = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeM, longitudeM))

The geograpical coordinate are 50.098909649999996, 8.70205006461438.


In [99]:
# Textorschule
address = 'Textorschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeT = location.latitude
longitudeT = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeT, longitudeT))

The geograpical coordinate are 50.1025206, 8.68165180030677.


In [100]:
address = 'Willemerschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeW = location.latitude
longitudeW = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeW, longitudeW))

The geograpical coordinate are 50.103885500000004, 8.69257963938556.


In [101]:
# Holbeinschule
address = 'Holbeinschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeH = location.latitude
longitudeH = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeH, longitudeH))

The geograpical coordinate are 50.09864055, 8.679147674774775.


In [102]:
address = 'IGS Süd,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeI = location.latitude
longitudeI = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeI, longitudeI))

The geograpical coordinate are 50.0995283, 8.678996507360434.


In [103]:
address = 'Riedhofschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeR = location.latitude
longitudeR = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeR, longitudeR))

The geograpical coordinate are 50.0932282, 8.673617773889475.


In [104]:
address = 'Schillerschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeS = location.latitude
longitudeS = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeS, longitudeS))

The geograpical coordinate are 50.10189015, 8.677255152633979.


In [105]:
address = 'Carl-Schurz-Schule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeC = location.latitude
longitudeC = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeC, longitudeC))

The geograpical coordinate are 50.10095535, 8.676494314145728.


In [106]:
address = 'Martin-Buber-Schule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeMB = location.latitude
longitudeMB = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeMB, longitudeMB))

The geograpical coordinate are 50.086167200000006, 8.68122038206014.


In [107]:
address = 'Gruneliusschule,Frankfurt am Main'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitudeG = location.latitude
longitudeG = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitudeG, longitudeG))

The geograpical coordinate are 50.0977959, 8.726380840141971.


In [115]:
map_FRA_SN = folium.Map(location=[latitude, longitude], zoom_start=10)

folium.GeoJson(df_distCity['geometry'],style_function=style_fcn).add_to(map_FRA_SN)
folium.Circle(location=[latitudeB, longitudeB], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)    
folium.Circle(location=[latitudeD, longitudeD], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeF, longitudeF], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeM, longitudeM], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeT, longitudeT], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeW, longitudeW], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeH, longitudeH], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeI, longitudeI], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeR, longitudeR], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeS, longitudeS], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
folium.Circle(location=[latitudeC, longitudeC], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
#folium.Circle(location=[latitudeMB, longitudeMB], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)
#folium.Circle(location=[latitudeG, longitudeG], radius=250, popup='city', color="#008080").add_to(map_FRA_SN)

map_FRA_SN