## Analyzing the Neighborhoods/ Localities within Boroughs in the city of Berlin

##### Import Libraries

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will

##### Get Borough data of the city of Berlin

In [10]:
dfs = pd.read_html('https://en.wikipedia.org/wiki/Boroughs_and_neighborhoods_of_Berlin', header=0)


##### Get 12 boroughs of Berlin city - Get maximum of 3 localities/ neighborhoods for each borough that has the highest population densities 

In [11]:
b = ['Mitte', 'Friedrichshain-Kreuzberg', ' Pankow', 'Charlottenburg-Wilmersdorf', ' Spandau', ' Steglitz-Zehlendorf', 'Tempelhof-Schöneberg', 'Neukölln', 'Treptow-Köpenick', 'Marzahn-Hellersdorf', 'Lichtenberg', ' Reinickendorf']
#print (len(b))

df_borough = pd.DataFrame(columns = ['Locality', 'Borough'])
count = 0    
for index,df in enumerate(dfs):
    try:
        if index == 0 or index == 1:
            pass
        else:
            df.sort_values('Density inhabitants per km²', axis=0, ascending=False, inplace=True)
            df = df.reset_index(drop=True)
            #df1
            df = df.loc[0:2,['Locality']]
            df['Borough'] = b[count]
            #print (df)
            df_borough = df_borough.append(df)
            #df_borough
            count = count+1
    except:
        pass
df_borough = (df_borough[['Borough', 'Locality']]).reset_index(drop=True)
df_borough

Unnamed: 0,Borough,Locality
0,Mitte,(0106) Gesundbrunnen
1,Mitte,(0103) Hansaviertel
2,Mitte,(0102) Moabit
3,Friedrichshain-Kreuzberg,(0202) Kreuzberg
4,Friedrichshain-Kreuzberg,(0201) Friedrichshain
5,Pankow,(0301) Prenzlauer Berg
6,Pankow,(0307) Pankow
7,Pankow,(0302) Weißensee
8,Charlottenburg-Wilmersdorf,(0402) Wilmersdorf
9,Charlottenburg-Wilmersdorf,(0401) Charlottenburg


In [8]:
!conda install -c conda-forge geocoder --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    orderedset-2.0             |           py35_0         685 KB  conda-forge
    geocoder-1.38.1            |             py_0          52 KB  conda-forge
    ratelim-0.1.6              |           py35_0           5 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         742 KB

The following NEW packages will be INSTALLED:

    geocoder:   1.38.1-py_0  conda-forge
    orderedset: 2.0-py35_0   conda-forge
    ratelim:    0.1.6-py35_0 conda-forge


Downloading and Extracting Packages
orderedset-2.0       | 685 KB    | ##################################### | 100% 
geocoder-1.38.1      | 52 KB     | #######################

##### Clean df_borough dataframe - Get postal code of each locality to a seperate column

In [12]:
for index, row in enumerate(df_borough.Locality):
    #print (row)
    row = row.split(")")
    l1 = row[0].split("(")
    #print (l1)
    df_borough.loc[index , 'Postal_code'] = l1[1]
    df_borough.loc[index, 'Locality'] = row[1]

df_borough = (df_borough[['Postal_code', 'Borough', 'Locality']]).reset_index(drop=True)
df_borough    


Unnamed: 0,Postal_code,Borough,Locality
0,106,Mitte,Gesundbrunnen
1,103,Mitte,Hansaviertel
2,102,Mitte,Moabit
3,202,Friedrichshain-Kreuzberg,Kreuzberg
4,201,Friedrichshain-Kreuzberg,Friedrichshain
5,301,Pankow,Prenzlauer Berg
6,307,Pankow,Pankow
7,302,Pankow,Weißensee
8,402,Charlottenburg-Wilmersdorf,Wilmersdorf
9,401,Charlottenburg-Wilmersdorf,Charlottenburg


##### Obtain geographical coordinates per locality

In [59]:
add_list = []
for index, rows in df_borough.iterrows(): 
    locality = rows.Locality
    borough = rows.Borough
    address = (locality + ',' + borough + ',' + 'Berlin' + ',' + 'Germany')
    add_list.append(address)
    #print (add)
for ind, add in enumerate(add_list):
    #print (ind, add)
    geolocator = Nominatim (user_agent = 'Berlin_explorer') #define a user-agent
    location = geolocator.geocode(add)
    latitude = location.latitude
    longitude = location.longitude
    #print (latitude)
    #print (longitude) 
    df_borough.loc[ind, 'Latitude'] = latitude
    df_borough.loc[ind, 'Longitude'] = longitude
df_borough.head()

Unnamed: 0,Postal_code,Borough,Locality,Latitude,Longitude
0,106,Mitte,Gesundbrunnen,52.55092,13.384846
1,103,Mitte,Hansaviertel,52.519123,13.341872
2,102,Mitte,Moabit,52.530102,13.342542
3,202,Friedrichshain-Kreuzberg,Kreuzberg,52.497644,13.411914
4,201,Friedrichshain-Kreuzberg,Friedrichshain,52.512215,13.45029


##### Map of Berlin with highest dense localities per each of the 12 boroughs

In [56]:
address = 'Berlin, Germany'

geolocator = Nominatim(user_agent="Berlin_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Berlin City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Berlin City are 52.5170365, 13.3888599.


In [58]:
# create map of Berlin using latitude and longitude values
map_berlin = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_borough['Latitude'], df_borough['Longitude'], df_borough['Borough'], df_borough['Locality']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_berlin)  
    
map_berlin

##### Define Foursquare credentials

In [14]:
#Define Foursquare credentials and version
CLIENT_ID = 'WGX0SR5XECNKWUG4MNHZ3ARPZM11HNONEAHD0DHMHQ1APSLY' # your Foursquare ID
CLIENT_SECRET = 'QEO4BTONLV1KMGBEJEIMG1KPTZFFHACT5WCRUI4VSSDSPRNV' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: WGX0SR5XECNKWUG4MNHZ3ARPZM11HNONEAHD0DHMHQ1APSLY
CLIENT_SECRET:QEO4BTONLV1KMGBEJEIMG1KPTZFFHACT5WCRUI4VSSDSPRNV


In [15]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
#function to repeat the same process to all the neighborhoods in a borough of Berlin
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Locality', 
                  'Locality Latitude', 
                  'Locality Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Analyzing Boroughs of Berlin city

#### Analyzing Borough 1 - Mitte

In [17]:
#Extract Mitte data to a seperate DF
Mitte_data = df_borough[df_borough['Borough'] == 'Mitte'].reset_index(drop=True)
Mitte_data.head()

Unnamed: 0,Postal_code,Borough,Locality,Latitude,Longitude
0,106,Mitte,Gesundbrunnen,52.55092,13.384846
1,103,Mitte,Hansaviertel,52.519123,13.341872
2,102,Mitte,Moabit,52.530102,13.342542


In [24]:
#code to run the above 'getNearbyVenues' function on each neighborhood and create a new dataframe per borough with venue data
limit=100

Mitte_venues = getNearbyVenues(names=Mitte_data['Locality'],
                                   latitudes=Mitte_data['Latitude'],
                                   longitudes=Mitte_data['Longitude']
                                  )
Mitte_venues.head()

 Gesundbrunnen
 Hansaviertel
 Moabit


Unnamed: 0,Locality,Locality Latitude,Locality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Gesundbrunnen,52.55092,13.384846,La Femme,52.550107,13.385304,Breakfast Spot
1,Gesundbrunnen,52.55092,13.384846,Il Milanese del Tacco,52.551772,13.384107,Gourmet Shop
2,Gesundbrunnen,52.55092,13.384846,F-Bar,52.551908,13.386493,Dive Bar
3,Gesundbrunnen,52.55092,13.384846,MOXY Berlin Humboldthain Park,52.549568,13.384071,Hotel
4,Gesundbrunnen,52.55092,13.384846,Curry Baude,52.54942,13.386999,Currywurst Joint


In [19]:
#Analyze each locality
#one hot encoding
Mitte_onehot = pd.get_dummies(Mitte_venues[['Venue Category']], prefix="", prefix_sep="")

# add Locality/ neighborhood column back to dataframe
Mitte_onehot['Locality'] = Mitte_venues['Locality'] 

# move neighborhood column to the first column
fixed_columns = [Mitte_onehot.columns[-1]] + list(Mitte_onehot.columns[:-1])
Mitte_onehot = Mitte_onehot[fixed_columns]

Mitte_onehot.head()

Unnamed: 0,Locality,Art Museum,Asian Restaurant,Austrian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Burger Joint,Bus Stop,Café,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Currywurst Joint,Dive Bar,Doner Restaurant,Drugstore,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Fish Market,Food Court,French Restaurant,Garden,Gas Station,German Restaurant,Gourmet Shop,Grocery Store,Gym / Fitness Center,Halal Restaurant,Historic Site,History Museum,Hostel,Hotel,IT Services,Ice Cream Shop,Irish Pub,Italian Restaurant,Kebab Restaurant,Light Rail Station,Liquor Store,Mediterranean Restaurant,Metro Station,Mobile Phone Shop,Organic Grocery,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Pharmacy,Pide Place,Pier,Platform,Playground,Plaza,Pub,Rental Car Location,Restaurant,River,Seafood Restaurant,Shopping Mall,Sporting Goods Shop,Supermarket,Syrian Restaurant,Taverna,Theater,Thrift / Vintage Store,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [47]:
Mitte_grouped = Mitte_onehot.groupby('Locality').mean().reset_index()
Mitte_grouped

Unnamed: 0,Locality,Art Museum,Asian Restaurant,Austrian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Burger Joint,Bus Stop,Café,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Currywurst Joint,Dive Bar,Doner Restaurant,Drugstore,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Fish Market,Food Court,French Restaurant,Garden,Gas Station,German Restaurant,Gourmet Shop,Grocery Store,Gym / Fitness Center,Halal Restaurant,Historic Site,History Museum,Hostel,Hotel,IT Services,Ice Cream Shop,Irish Pub,Italian Restaurant,Kebab Restaurant,Light Rail Station,Liquor Store,Mediterranean Restaurant,Metro Station,Mobile Phone Shop,Organic Grocery,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Pharmacy,Pide Place,Pier,Platform,Playground,Plaza,Pub,Rental Car Location,Restaurant,River,Seafood Restaurant,Shopping Mall,Sporting Goods Shop,Supermarket,Syrian Restaurant,Taverna,Theater,Thrift / Vintage Store,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Gesundbrunnen,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.042553,0.021277,0.0,0.0,0.021277,0.021277,0.021277,0.021277,0.0,0.0,0.021277,0.021277,0.021277,0.021277,0.06383,0.0,0.021277,0.0,0.0,0.0,0.0,0.021277,0.021277,0.0,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.042553,0.0,0.021277,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.021277,0.021277,0.021277,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.021277,0.0,0.0,0.0,0.042553,0.0,0.06383,0.0,0.0
1,Hansaviertel,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.03125,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.03125,0.0,0.03125,0.03125,0.03125,0.03125,0.03125,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0625,0.0,0.0,0.03125,0.0,0.03125,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.03125,0.03125,0.0,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0
2,Moabit,0.0,0.014706,0.014706,0.014706,0.0,0.044118,0.014706,0.0,0.0,0.014706,0.029412,0.044118,0.0,0.058824,0.0,0.0,0.014706,0.029412,0.014706,0.0,0.0,0.0,0.044118,0.029412,0.014706,0.014706,0.014706,0.014706,0.014706,0.014706,0.0,0.0,0.044118,0.0,0.014706,0.029412,0.0,0.0,0.0,0.044118,0.044118,0.014706,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.014706,0.0,0.014706,0.029412,0.0,0.058824,0.0,0.014706,0.0,0.014706,0.0,0.014706,0.014706,0.029412,0.014706


In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [22]:
#display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Locality']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
locality_venues_sorted = pd.DataFrame(columns=columns)
locality_venues_sorted['Locality'] = Mitte_grouped['Locality']

for ind in np.arange(Mitte_grouped.shape[0]):
    locality_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Mitte_grouped.iloc[ind, :], num_top_venues)

locality_venues_sorted.head()



Unnamed: 0,Locality,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Gesundbrunnen,Turkish Restaurant,Platform,Drugstore,Hotel,Trail,Supermarket,Bar,Italian Restaurant,Bookstore,Ice Cream Shop
1,Hansaviertel,Café,Art Museum,Pedestrian Plaza,Bakery,Liquor Store,Bus Stop,Pastry Shop,Metro Station,Mediterranean Restaurant,Light Rail Station
2,Moabit,Café,Supermarket,Hostel,German Restaurant,Doner Restaurant,Burger Joint,Bar,Hotel,Breakfast Spot,Gym / Fitness Center


##### Analyzing all 5 localities of borough 'Mitte'

In [44]:
#Get 'Mitte' localities in to a DF
df_mitte_all = dfs[2]
df_mitte_all = df_mitte_all[['Locality']].reset_index(drop=True)

#Extract postal_code to a seperate column
for index, row in enumerate(df_mitte_all.Locality):
    row = row.split(")")
    l1 = row[0].split("(")
    df_mitte_all.loc[index , 'Postal_code'] = l1[1]
    df_mitte_all.loc[index, 'Locality'] = row[1]
df_mitte_all['Borough'] = 'Mitte'
df_mitte_all = (df_mitte_all[['Postal_code', 'Borough', 'Locality']]).reset_index(drop=True)
#df_mitte_all

#Get location coordinates
add_list = []
for index, rows in df_mitte_all.iterrows(): 
    locality = rows.Locality
    borough = rows.Borough
    address = (locality + ',' + borough + ',' + 'Berlin' + ',' + 'Germany')
    add_list.append(address)
    #print (add)
for ind, add in enumerate(add_list):
    geolocator = Nominatim (user_agent = 'Berlin_explorer') #define a user-agent
    location = geolocator.geocode(add)
    latitude = location.latitude
    longitude = location.longitude
     
    df_mitte_all.loc[ind, 'Latitude'] = latitude
    df_mitte_all.loc[ind, 'Longitude'] = longitude
#df_mitte_all

#code to run the above 'getNearbyVenues' function on each neighborhood and create a new dataframe for 'Mitte' with venue data
limit=100

df_mitte_all_venues = getNearbyVenues(names=df_mitte_all['Locality'],
                                   latitudes=df_mitte_all['Latitude'],
                                   longitudes=df_mitte_all['Longitude']
                                  )
#df_mitte_all_venues.head()



 Gesundbrunnen
 Hansaviertel
 Moabit
 Wedding
 Mitte
 Tiergarten


Unnamed: 0,Locality,Locality Latitude,Locality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Gesundbrunnen,52.55092,13.384846,La Femme,52.550107,13.385304,Breakfast Spot
1,Gesundbrunnen,52.55092,13.384846,Il Milanese del Tacco,52.551772,13.384107,Gourmet Shop
2,Gesundbrunnen,52.55092,13.384846,F-Bar,52.551908,13.386493,Dive Bar
3,Gesundbrunnen,52.55092,13.384846,MOXY Berlin Humboldthain Park,52.549568,13.384071,Hotel
4,Gesundbrunnen,52.55092,13.384846,Curry Baude,52.54942,13.386999,Currywurst Joint


In [45]:
#Analyze each locality
#one hot encoding
df_mitte_all_onehot = pd.get_dummies(df_mitte_all_venues[['Venue Category']], prefix="", prefix_sep="")

# add Locality/ neighborhood column back to dataframe
df_mitte_all_onehot['Locality'] = df_mitte_all_venues['Locality'] 

# move neighborhood column to the first column
fixed_columns = [df_mitte_all_onehot.columns[-1]] + list(df_mitte_all_onehot.columns[:-1])
df_mitte_all_onehot = df_mitte_all_onehot[fixed_columns]

df_mitte_all_onehot.head()

Unnamed: 0,Locality,Art Gallery,Art Museum,Asian Restaurant,Austrian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Big Box Store,Bistro,Board Shop,Boat or Ferry,Bookstore,Breakfast Spot,Brewery,Burger Joint,Bus Stop,Café,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Costume Shop,Cupcake Shop,Currywurst Joint,Dive Bar,Doner Restaurant,Drugstore,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Fast Food Restaurant,Fish Market,Food & Drink Shop,Food Court,Fountain,French Restaurant,Garden,Gas Station,German Restaurant,Gift Shop,Gourmet Shop,Grocery Store,Gym / Fitness Center,Halal Restaurant,Historic Site,History Museum,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Inn,Irish Pub,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Light Rail Station,Liquor Store,Lounge,Mediterranean Restaurant,Memorial Site,Metro Station,Mobile Phone Shop,Museum,Neighborhood,Nightclub,Organic Grocery,Outdoor Sculpture,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Pharmacy,Pide Place,Pier,Platform,Playground,Plaza,Pub,Rental Car Location,Restaurant,River,Salad Place,Scandinavian Restaurant,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Sporting Goods Shop,Steakhouse,Supermarket,Syrian Restaurant,Taverna,Tennis Court,Theater,Thrift / Vintage Store,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Gesundbrunnen,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [46]:
df_mitte_all_grouped = df_mitte_all_onehot.groupby('Locality').mean().reset_index()
df_mitte_all_grouped.head()

Unnamed: 0,Locality,Art Gallery,Art Museum,Asian Restaurant,Austrian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Big Box Store,Bistro,Board Shop,Boat or Ferry,Bookstore,Breakfast Spot,Brewery,Burger Joint,Bus Stop,Café,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Costume Shop,Cupcake Shop,Currywurst Joint,Dive Bar,Doner Restaurant,Drugstore,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Fast Food Restaurant,Fish Market,Food & Drink Shop,Food Court,Fountain,French Restaurant,Garden,Gas Station,German Restaurant,Gift Shop,Gourmet Shop,Grocery Store,Gym / Fitness Center,Halal Restaurant,Historic Site,History Museum,Hostel,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Inn,Irish Pub,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Light Rail Station,Liquor Store,Lounge,Mediterranean Restaurant,Memorial Site,Metro Station,Mobile Phone Shop,Museum,Neighborhood,Nightclub,Organic Grocery,Outdoor Sculpture,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Pharmacy,Pide Place,Pier,Platform,Playground,Plaza,Pub,Rental Car Location,Restaurant,River,Salad Place,Scandinavian Restaurant,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shopping Mall,Sporting Goods Shop,Steakhouse,Supermarket,Syrian Restaurant,Taverna,Tennis Court,Theater,Thrift / Vintage Store,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Gesundbrunnen,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.042553,0.021277,0.0,0.0,0.0,0.021277,0.021277,0.021277,0.021277,0.0,0.0,0.0,0.021277,0.0,0.0,0.021277,0.021277,0.021277,0.06383,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.021277,0.0,0.0,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.042553,0.0,0.0,0.021277,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.021277,0.0,0.021277,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.021277,0.0,0.0,0.0,0.0,0.042553,0.0,0.06383,0.0,0.0
1,Hansaviertel,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.03125,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.03125,0.03125,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0625,0.0,0.0,0.03125,0.0,0.03125,0.03125,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.03125,0.0,0.0
2,Mitte,0.035088,0.035088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.017544,0.017544,0.0,0.0,0.070175,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.035088,0.0,0.017544,0.0,0.105263,0.017544,0.0,0.0,0.0,0.0,0.017544,0.070175,0.0,0.070175,0.017544,0.0,0.017544,0.017544,0.0,0.017544,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.070175,0.017544,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.017544,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.017544
3,Moabit,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.044118,0.014706,0.0,0.0,0.0,0.0,0.014706,0.029412,0.0,0.044118,0.0,0.058824,0.0,0.0,0.014706,0.029412,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.029412,0.014706,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.014706,0.0,0.014706,0.0,0.0,0.044118,0.0,0.0,0.014706,0.029412,0.0,0.0,0.0,0.044118,0.044118,0.0,0.014706,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.014706,0.029412,0.0,0.0,0.058824,0.0,0.014706,0.0,0.0,0.014706,0.0,0.014706,0.014706,0.029412,0.014706
4,Tiergarten,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [61]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Locality']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['Locality'] = df_mitte_all_grouped['Locality']

for ind in np.arange(df_mitte_all_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_mitte_all_grouped.iloc[ind, :], num_top_venues)

venues_sorted.head()

Unnamed: 0,Locality,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Gesundbrunnen,Turkish Restaurant,Platform,Drugstore,Hotel,Trail,Supermarket,Bar,Bookstore,Italian Restaurant,Falafel Restaurant
1,Hansaviertel,Café,Pedestrian Plaza,Art Museum,Bakery,Park,Rental Car Location,Plaza,Playground,Bus Stop,Pier
2,Mitte,German Restaurant,Museum,Café,Hotel,History Museum,Art Gallery,Art Museum,Concert Hall,Fountain,Historic Site
3,Moabit,Café,Supermarket,Hostel,Burger Joint,Doner Restaurant,Bar,Hotel,German Restaurant,Drugstore,Gym / Fitness Center
4,Tiergarten,Lounge,Hotel Bar,Garden,Historic Site,Café,Memorial Site,Scandinavian Restaurant,Park,Breakfast Spot,Sculpture Garden


#### Cluster localities in borough - 'Mitte'

In [49]:
# set number of clusters
kclusters = 2

mitte_grouped_clustering = df_mitte_all_grouped.drop('Locality', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mitte_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 0], dtype=int32)

In [53]:
# add clustering labels
#venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

df_mitte_all_merged = df_mitte_all

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
df_mitte_all_merged = df_mitte_all_merged.join(venues_sorted.set_index('Locality'), on='Locality')

df_mitte_all_merged.head() # check the last columns!

Unnamed: 0,Postal_code,Borough,Locality,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,106,Mitte,Gesundbrunnen,52.55092,13.384846,1,Turkish Restaurant,Platform,Drugstore,Hotel,Trail,Supermarket,Bar,Bookstore,Italian Restaurant,Falafel Restaurant
1,103,Mitte,Hansaviertel,52.519123,13.341872,1,Café,Pedestrian Plaza,Art Museum,Bakery,Park,Rental Car Location,Plaza,Playground,Bus Stop,Pier
2,102,Mitte,Moabit,52.530102,13.342542,1,Café,Supermarket,Hostel,Burger Joint,Doner Restaurant,Bar,Hotel,German Restaurant,Drugstore,Gym / Fitness Center
3,105,Mitte,Wedding,52.550123,13.34197,0,Supermarket,Pharmacy,Tennis Court,Park,Gas Station,Big Box Store,Bar,Food & Drink Shop,Bakery,Ice Cream Shop
4,101,Mitte,Mitte,52.51769,13.402376,1,German Restaurant,Museum,Café,Hotel,History Museum,Art Gallery,Art Museum,Concert Hall,Fountain,Historic Site


#### Examine clusters

In [63]:
df_mitte_all_merged.loc[df_mitte_all_merged['Cluster Labels'] == 0, df_mitte_all_merged.columns[[1] + list(range(2, df_mitte_all_merged.shape[1]))]]

Unnamed: 0,Borough,Locality,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Mitte,Wedding,52.550123,13.34197,0,Supermarket,Pharmacy,Tennis Court,Park,Gas Station,Big Box Store,Bar,Food & Drink Shop,Bakery,Ice Cream Shop


In [62]:
df_mitte_all_merged.loc[df_mitte_all_merged['Cluster Labels'] == 1, df_mitte_all_merged.columns[[1] + list(range(2, df_mitte_all_merged.shape[1]))]]

Unnamed: 0,Borough,Locality,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Mitte,Gesundbrunnen,52.55092,13.384846,1,Turkish Restaurant,Platform,Drugstore,Hotel,Trail,Supermarket,Bar,Bookstore,Italian Restaurant,Falafel Restaurant
1,Mitte,Hansaviertel,52.519123,13.341872,1,Café,Pedestrian Plaza,Art Museum,Bakery,Park,Rental Car Location,Plaza,Playground,Bus Stop,Pier
2,Mitte,Moabit,52.530102,13.342542,1,Café,Supermarket,Hostel,Burger Joint,Doner Restaurant,Bar,Hotel,German Restaurant,Drugstore,Gym / Fitness Center
4,Mitte,Mitte,52.51769,13.402376,1,German Restaurant,Museum,Café,Hotel,History Museum,Art Gallery,Art Museum,Concert Hall,Fountain,Historic Site
5,Mitte,Tiergarten,52.509778,13.35726,1,Lounge,Hotel Bar,Garden,Historic Site,Café,Memorial Site,Scandinavian Restaurant,Park,Breakfast Spot,Sculpture Garden


#### Analyzing Borough 2 - Friedrichshain-Kreuzberg

In [26]:
#Extract Friedrichshain-Kreuzberg data to a seperate DF
FK_data = df_borough[df_borough['Borough'] == 'Friedrichshain-Kreuzberg'].reset_index(drop=True)
FK_data.head()

Unnamed: 0,Postal_code,Borough,Locality,Latitude,Longitude
0,202,Friedrichshain-Kreuzberg,Kreuzberg,52.497644,13.411914
1,201,Friedrichshain-Kreuzberg,Friedrichshain,52.512215,13.45029


In [27]:
#code to run the above 'getNearbyVenues' function on each neighborhood and create a new dataframe per borough with venue data
limit=100

FK_venues = getNearbyVenues(names=FK_data['Locality'],
                                   latitudes=Mitte_data['Latitude'],
                                   longitudes=Mitte_data['Longitude']
                                  )
FK_venues.head()

 Kreuzberg
 Friedrichshain


Unnamed: 0,Locality,Locality Latitude,Locality Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Kreuzberg,52.55092,13.384846,La Femme,52.550107,13.385304,Breakfast Spot
1,Kreuzberg,52.55092,13.384846,Il Milanese del Tacco,52.551772,13.384107,Gourmet Shop
2,Kreuzberg,52.55092,13.384846,F-Bar,52.551908,13.386493,Dive Bar
3,Kreuzberg,52.55092,13.384846,MOXY Berlin Humboldthain Park,52.549568,13.384071,Hotel
4,Kreuzberg,52.55092,13.384846,Curry Baude,52.54942,13.386999,Currywurst Joint


In [28]:
#Analyze each locality - FK
#one hot encoding
FK_onehot = pd.get_dummies(FK_venues[['Venue Category']], prefix="", prefix_sep="")

# add Locality/ neighborhood column back to dataframe
FK_onehot['Locality'] = FK_venues['Locality'] 

# move neighborhood column to the first column
fixed_columns = [FK_onehot.columns[-1]] + list(FK_onehot.columns[:-1])
FK_onehot = FK_onehot[fixed_columns]

FK_onehot.head()

Unnamed: 0,Locality,Art Museum,Bakery,Bar,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Bus Stop,Café,Chocolate Shop,Climbing Gym,Clothing Store,Convenience Store,Currywurst Joint,Dive Bar,Doner Restaurant,Drugstore,Falafel Restaurant,Garden,Gas Station,Gourmet Shop,Grocery Store,Gym / Fitness Center,Halal Restaurant,Historic Site,History Museum,Hostel,Hotel,Ice Cream Shop,Irish Pub,Italian Restaurant,Kebab Restaurant,Light Rail Station,Liquor Store,Mediterranean Restaurant,Metro Station,Mobile Phone Shop,Organic Grocery,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Pide Place,Pier,Platform,Playground,Plaza,Rental Car Location,River,Sporting Goods Shop,Supermarket,Syrian Restaurant,Theater,Trail,Turkish Restaurant
0,Kreuzberg,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Kreuzberg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Kreuzberg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Kreuzberg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Kreuzberg,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [29]:
FK_grouped = FK_onehot.groupby('Locality').mean().reset_index()
FK_grouped.head()

Unnamed: 0,Locality,Art Museum,Bakery,Bar,Bistro,Boat or Ferry,Bookstore,Breakfast Spot,Bus Stop,Café,Chocolate Shop,Climbing Gym,Clothing Store,Convenience Store,Currywurst Joint,Dive Bar,Doner Restaurant,Drugstore,Falafel Restaurant,Garden,Gas Station,Gourmet Shop,Grocery Store,Gym / Fitness Center,Halal Restaurant,Historic Site,History Museum,Hostel,Hotel,Ice Cream Shop,Irish Pub,Italian Restaurant,Kebab Restaurant,Light Rail Station,Liquor Store,Mediterranean Restaurant,Metro Station,Mobile Phone Shop,Organic Grocery,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Pide Place,Pier,Platform,Playground,Plaza,Rental Car Location,River,Sporting Goods Shop,Supermarket,Syrian Restaurant,Theater,Trail,Turkish Restaurant
0,Friedrichshain,0.0625,0.0625,0.0,0.03125,0.03125,0.0,0.0,0.03125,0.09375,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,0.0,0.03125,0.03125,0.03125,0.03125,0.03125,0.0,0.0,0.0,0.0,0.03125,0.03125,0.0625,0.0,0.03125,0.0,0.03125,0.03125,0.03125,0.03125,0.03125,0.03125,0.0,0.03125,0.0,0.03125
1,Kreuzberg,0.0,0.0,0.042553,0.0,0.0,0.042553,0.021277,0.0,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.06383,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.021277,0.042553,0.021277,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.021277,0.021277,0.021277,0.021277,0.0,0.0,0.0,0.021277,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.042553,0.021277,0.0,0.042553,0.06383


In [32]:
#display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Locality']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
locality_venues_sorted = pd.DataFrame(columns=columns)
locality_venues_sorted['Locality'] = FK_grouped['Locality']

for ind in np.arange(FK_grouped.shape[0]):
    locality_venues_sorted.iloc[ind, 1:] = return_most_common_venues(FK_grouped.iloc[ind, :], num_top_venues)

locality_venues_sorted.head()

Unnamed: 0,Locality,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Friedrichshain,Café,Art Museum,Bakery,Pedestrian Plaza,Metro Station,Bistro,Boat or Ferry,Bus Stop,Currywurst Joint,Gym / Fitness Center
1,Kreuzberg,Turkish Restaurant,Platform,Drugstore,Bar,Bookstore,Trail,Italian Restaurant,Hotel,Supermarket,Doner Restaurant
