# Final Data Report file for Seoul Neighborhood Analysis

* Build dataframe of neighborhoods in Seoul
* Get geo coordinates of each neighborhood
* Get data of venues in neighborhoods from Foursquare API
* Cluster neighborhoods using KNN to determine best neighborhood

## Import Libraries

In [2]:
import numpy as np 

import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json 

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import requests 
!pip install beautifulsoup4
from bs4 import BeautifulSoup 

from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print("Libraries imported.")

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


## Scrape data from Wikipedia

In [3]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Districts_of_Seoul").text

In [4]:
soup = BeautifulSoup(data, 'html.parser')

In [5]:
neighborhoods = []

In [6]:
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoods.append(row.text)

In [7]:
seoul = pd.DataFrame({"Neighborhood": neighborhoods})

seoul.head()

Unnamed: 0,Neighborhood
0,► Transport in Seoul by district‎ (23 C)
1,► Buildings and structures in Seoul by distri...
2,► Geography of Seoul by district‎ (25 C)
3,"► Dobong District‎ (3 C, 4 P)"
4,"► Dongdaemun District‎ (3 C, 6 P)"


Get rid of unnecessary rows

In [8]:
seoul = seoul.iloc[3:]
seoul.head()

Unnamed: 0,Neighborhood
3,"► Dobong District‎ (3 C, 4 P)"
4,"► Dongdaemun District‎ (3 C, 6 P)"
5,"► Dongjak District‎ (3 C, 4 P)"
6,"► Eunpyeong District‎ (2 C, 2 P)"
7,"► Gangbuk District‎ (3 C, 1 P)"


In [9]:
seoul.shape

(26, 1)

## Get geo coordinates

In [10]:
pip install geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 6.6MB/s ta 0:00:011
[?25hCollecting click (from geocoder)
[?25l  Downloading https://files.pythonhosted.org/packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl (82kB)
[K     |████████████████████████████████| 92kB 14.9MB/s eta 0:00:01
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Collecting future (from geocoder)
[?25l  Downloading https://files.pythonhosted.org/packages/45/0b/38b06fd9b92dc2b68d58b75f900e97884c45bedd2ff83203d933cf5851c9/future-0.18.2.tar.gz (829kB)
[K     |████████████████████████████████| 829kB 23.7MB/s eta 0:00:01
Building wheel

In [11]:
import geocoder

# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Seoul, South Korea'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords


In [12]:
coords = [ get_latlng(neighborhood) for neighborhood in seoul["Neighborhood"].tolist() ]

In [13]:
coords

[[37.568260000000066, 126.97783000000004],
 [37.581890000000044, 127.05408000000011],
 [37.500560000000064, 126.95149000000004],
 [37.61846000000003, 126.92780000000005],
 [37.509089986158166, 127.01226885653118],
 [37.55039000000005, 127.14546000000007],
 [37.495100000000036, 127.06278000000009],
 [37.568260000000066, 126.97783000000004],
 [37.474860000000035, 126.89106000000004],
 [37.568260000000066, 126.97783000000004],
 [37.568260000000066, 126.97783000000004],
 [37.53913000000006, 127.08366000000001],
 [37.568260000000066, 126.97783000000004],
 [37.568260000000066, 126.97783000000004],
 [37.60199000000006, 127.1046100000001],
 [37.568260000000066, 126.97783000000004],
 [37.568260000000066, 126.97783000000004],
 [37.49056000000007, 127.0200000000001],
 [37.568260000000066, 126.97783000000004],
 [37.61505000000005, 127.02496000000008],
 [37.547840000000065, 127.02461000000005],
 [37.568260000000066, 126.97783000000004],
 [37.53532000000007, 126.84334000000001],
 [37.568260000000066

In [14]:
seoul_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
seoul_coords.head()

Unnamed: 0,Latitude,Longitude
0,37.56826,126.97783
1,37.58189,127.05408
2,37.50056,126.95149
3,37.61846,126.9278
4,37.50909,127.012269


In [15]:
seoul['Latitude'] = seoul_coords['Latitude']
seoul['Longitude'] = seoul_coords['Longitude']
seoul

Unnamed: 0,Neighborhood,Latitude,Longitude
3,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278
4,"► Dongdaemun District‎ (3 C, 6 P)",37.50909,127.012269
5,"► Dongjak District‎ (3 C, 4 P)",37.55039,127.14546
6,"► Eunpyeong District‎ (2 C, 2 P)",37.4951,127.06278
7,"► Gangbuk District‎ (3 C, 1 P)",37.56826,126.97783
8,"► Gangdong District‎ (3 C, 5 P)",37.47486,126.89106
9,"► Gangnam District‎ (3 C, 17 P)",37.56826,126.97783
10,"► Gangseo District, Seoul‎ (3 C, 2 P)",37.56826,126.97783
11,"► Geumcheon District‎ (3 C, 4 P)",37.53913,127.08366
12,"► Guro District, Seoul‎ (3 C, 8 P)",37.56826,126.97783


Drop rows with NaN values

In [16]:
seoul = seoul[seoul['Latitude'].notna()]
seoul

Unnamed: 0,Neighborhood,Latitude,Longitude
3,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278
4,"► Dongdaemun District‎ (3 C, 6 P)",37.50909,127.012269
5,"► Dongjak District‎ (3 C, 4 P)",37.55039,127.14546
6,"► Eunpyeong District‎ (2 C, 2 P)",37.4951,127.06278
7,"► Gangbuk District‎ (3 C, 1 P)",37.56826,126.97783
8,"► Gangdong District‎ (3 C, 5 P)",37.47486,126.89106
9,"► Gangnam District‎ (3 C, 17 P)",37.56826,126.97783
10,"► Gangseo District, Seoul‎ (3 C, 2 P)",37.56826,126.97783
11,"► Geumcheon District‎ (3 C, 4 P)",37.53913,127.08366
12,"► Guro District, Seoul‎ (3 C, 8 P)",37.56826,126.97783


Save as CSV file

In [17]:
seoul.to_csv("seoul.csv", index=False)

## Create map of Seoul with districts highlighted

In [18]:
# get the coordinates of Seoul
address = 'Seoul, South Korea'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Seoul, South Korea is {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Seoul, South Korea is 37.5666791, 126.9782914.


In [19]:
# create map of Seoul using latitude and longitude values
seoul_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(seoul['Latitude'], seoul['Longitude'], seoul['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(seoul_map) 
    
seoul_map

In [20]:
seoul_map.save('seoul_map.html')

## Use Foursquare API to explore neighborhoods

In [21]:
# define Foursquare Credentials and Version
CLIENT_ID = 'PK0UBGJ5HC3CZ2THOXRIZ3LFOCCA0LBTJUUM1Z01L0BDGTWG' # your Foursquare ID
CLIENT_SECRET = 'EB2XAL4ICB4CU4MFU2UQDIYYC2C45RSMVTNQYEXWFSO5VRMS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PK0UBGJ5HC3CZ2THOXRIZ3LFOCCA0LBTJUUM1Z01L0BDGTWG
CLIENT_SECRET:EB2XAL4ICB4CU4MFU2UQDIYYC2C45RSMVTNQYEXWFSO5VRMS


Get top 50 venues within radius of 2000 meters.

In [22]:
radius = 2000
LIMIT = 50

venues = []

for lat, long, neighborhood in zip(seoul['Latitude'], seoul['Longitude'], seoul['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [23]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1135, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278,PENCIL5 Real Seoul hostel (펜슬5),37.616964,126.924293,Hostel
1,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278,Starbucks (스타벅스),37.611764,126.917264,Coffee Shop
2,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278,알라딘 중고서점,37.617474,126.91996,Used Bookstore
3,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278,연신내문고,37.619952,126.92032,Bookstore
4,"► Dobong District‎ (3 C, 4 P)",37.61846,126.9278,연신내물빛공원,37.620447,126.917008,Park


In [24]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"► Dobong District‎ (3 C, 4 P)",50,50,50,50,50,50
"► Dongdaemun District‎ (3 C, 6 P)",50,50,50,50,50,50
"► Dongjak District‎ (3 C, 4 P)",50,50,50,50,50,50
"► Eunpyeong District‎ (2 C, 2 P)",50,50,50,50,50,50
"► Gangbuk District‎ (3 C, 1 P)",50,50,50,50,50,50
"► Gangdong District‎ (3 C, 5 P)",50,50,50,50,50,50
"► Gangnam District‎ (3 C, 17 P)",50,50,50,50,50,50
"► Gangseo District, Seoul‎ (3 C, 2 P)",50,50,50,50,50,50
"► Geumcheon District‎ (3 C, 4 P)",50,50,50,50,50,50
"► Guro District, Seoul‎ (3 C, 8 P)",50,50,50,50,50,50


Get number of unique categories

In [25]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 123 uniques categories.


In [26]:
venues_df['VenueCategory'].unique()[:50]

array(['Hostel', 'Coffee Shop', 'Used Bookstore', 'Bookstore', 'Park',
       'Café', 'Mountain', 'Chinese Restaurant', 'Korean Restaurant',
       'Trail', 'Supermarket', 'Bakery', 'Fast Food Restaurant',
       'Sushi Restaurant', 'Gym', 'Market', 'Ice Cream Shop',
       'Clothing Store', 'Bed & Breakfast', 'Department Store',
       'Steakhouse', 'Seafood Restaurant', 'Bunsik Restaurant',
       'Multiplex', 'Buffet', 'Electronics Store', 'Golf Course',
       'Auto Workshop', 'Dive Bar', 'BBQ Joint', 'Japanese Restaurant',
       'Outlet Store', 'Paper / Office Supplies Store', 'Deli / Bodega',
       'Hotel', 'Gym / Fitness Center', 'Flower Shop', 'Hotel Bar',
       'Pie Shop', 'Brazilian Restaurant', 'Shopping Mall',
       'Cosmetics Shop', 'Noodle House', 'Indoor Play Area',
       'Italian Restaurant', 'Massage Studio', 'Performing Arts Venue',
       'History Museum', 'Convenience Store', 'Pub'], dtype=object)

## Neighborhood Analysis

In [27]:
# one hot encoding
seoul_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
seoul_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [seoul_onehot.columns[-1]] + list(seoul_onehot.columns[:-1])
seoul_onehot = seoul_onehot[fixed_columns]

print(seoul_onehot.shape)
seoul_onehot.head()

(1135, 124)


Unnamed: 0,Neighborhoods,Aquarium,Art Gallery,Art Museum,Asian Restaurant,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Big Box Store,Bistro,Bookstore,Bossam/Jokbal Restaurant,Brazilian Restaurant,Bridge,Bubble Tea Shop,Buffet,Bunsik Restaurant,Burger Joint,Café,Campground,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Dive Bar,Dog Run,Donut Shop,Electronics Store,Ethiopian Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,General Entertainment,German Restaurant,Golf Course,Grocery Store,Gukbap Restaurant,Gym,Gym / Fitness Center,Historic Site,History Museum,Hostel,Hot Spring,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Italian Restaurant,Janguh Restaurant,Japanese Restaurant,Korean Restaurant,Lounge,Market,Massage Studio,Metro Station,Mexican Restaurant,Mountain,Movie Theater,Multiplex,Museum,Noodle House,Other Great Outdoors,Outlet Store,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Pie Shop,Pizza Place,Plaza,Pub,Ramen Restaurant,Sake Bar,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Shopping Plaza,Snack Place,Spa,Sporting Goods Shop,Steakhouse,Supermarket,Surf Spot,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Tourist Information Center,Toy / Game Store,Trail,Train Station,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Women's Store,Zoo
0,"► Dobong District‎ (3 C, 4 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"► Dobong District‎ (3 C, 4 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"► Dobong District‎ (3 C, 4 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
3,"► Dobong District‎ (3 C, 4 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"► Dobong District‎ (3 C, 4 P)",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
seoul_grouped = seoul_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(seoul_grouped.shape)
seoul_grouped

(23, 124)


Unnamed: 0,Neighborhoods,Aquarium,Art Gallery,Art Museum,Asian Restaurant,Auto Workshop,BBQ Joint,Bagel Shop,Bakery,Bar,Bed & Breakfast,Beer Bar,Big Box Store,Bistro,Bookstore,Bossam/Jokbal Restaurant,Brazilian Restaurant,Bridge,Bubble Tea Shop,Buffet,Bunsik Restaurant,Burger Joint,Café,Campground,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Dive Bar,Dog Run,Donut Shop,Electronics Store,Ethiopian Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,General Entertainment,German Restaurant,Golf Course,Grocery Store,Gukbap Restaurant,Gym,Gym / Fitness Center,Historic Site,History Museum,Hostel,Hot Spring,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Italian Restaurant,Janguh Restaurant,Japanese Restaurant,Korean Restaurant,Lounge,Market,Massage Studio,Metro Station,Mexican Restaurant,Mountain,Movie Theater,Multiplex,Museum,Noodle House,Other Great Outdoors,Outlet Store,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Pie Shop,Pizza Place,Plaza,Pub,Ramen Restaurant,Sake Bar,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Shopping Plaza,Snack Place,Spa,Sporting Goods Shop,Steakhouse,Supermarket,Surf Spot,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Tourist Information Center,Toy / Game Store,Trail,Train Station,Udon Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Women's Store,Zoo
0,"► Dobong District‎ (3 C, 4 P)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.14,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"► Dongdaemun District‎ (3 C, 6 P)",0.0,0.0,0.0,0.0,0.02,0.14,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.06,0.0,0.0,0.0,0.08,0.0,0.02,0.02,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.02,0.02,0.0,0.04,0.08,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"► Dongjak District‎ (3 C, 4 P)",0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.22,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.02,0.0,0.04,0.08,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"► Eunpyeong District‎ (2 C, 2 P)",0.02,0.0,0.0,0.0,0.0,0.08,0.0,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.02,0.06,0.0,0.04,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0
4,"► Gangbuk District‎ (3 C, 1 P)",0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.04,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.1,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0
5,"► Gangdong District‎ (3 C, 5 P)",0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.08,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0
6,"► Gangnam District‎ (3 C, 17 P)",0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.04,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.1,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0
7,"► Gangseo District, Seoul‎ (3 C, 2 P)",0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.04,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.1,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0
8,"► Geumcheon District‎ (3 C, 4 P)",0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.08,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.1,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.02,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.02
9,"► Guro District, Seoul‎ (3 C, 8 P)",0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.04,0.02,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.1,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0


In [29]:
len(seoul_grouped[seoul_grouped["Korean Restaurant"] > 0])

23

New dataframe for "Korean Restaurants"

In [30]:
seoul_res = seoul_grouped[["Neighborhoods","Korean Restaurant"]]
seoul_res.head()

Unnamed: 0,Neighborhoods,Korean Restaurant
0,"► Dobong District‎ (3 C, 4 P)",0.1
1,"► Dongdaemun District‎ (3 C, 6 P)",0.08
2,"► Dongjak District‎ (3 C, 4 P)",0.08
3,"► Eunpyeong District‎ (2 C, 2 P)",0.18
4,"► Gangbuk District‎ (3 C, 1 P)",0.1


## KNN Cluster

In [31]:
# set number of clusters
kclusters = 5

seoul_clustering = seoul_res.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(seoul_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 1, 4, 2, 1, 2, 2, 2, 2], dtype=int32)

In [32]:
seoul_merged = seoul_res.copy()

seoul_merged["Cluster Labels"] = kmeans.labels_

In [33]:
seoul_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
seoul_merged.head()

Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels
0,"► Dobong District‎ (3 C, 4 P)",0.1,2
1,"► Dongdaemun District‎ (3 C, 6 P)",0.08,1
2,"► Dongjak District‎ (3 C, 4 P)",0.08,1
3,"► Eunpyeong District‎ (2 C, 2 P)",0.18,4
4,"► Gangbuk District‎ (3 C, 1 P)",0.1,2


In [34]:
# merge seoul_grouped with seoul_data to add latitude/longitude for each neighborhood
seoul_merged = seoul_merged.join(seoul.set_index("Neighborhood"), on="Neighborhood")

print(seoul_merged.shape)
seoul_merged.head() # check the last columns!

(23, 5)


Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
0,"► Dobong District‎ (3 C, 4 P)",0.1,2,37.61846,126.9278
1,"► Dongdaemun District‎ (3 C, 6 P)",0.08,1,37.50909,127.012269
2,"► Dongjak District‎ (3 C, 4 P)",0.08,1,37.55039,127.14546
3,"► Eunpyeong District‎ (2 C, 2 P)",0.18,4,37.4951,127.06278
4,"► Gangbuk District‎ (3 C, 1 P)",0.1,2,37.56826,126.97783


In [35]:
print(seoul_merged.shape)
seoul_merged.sort_values(["Cluster Labels"], inplace=True)
seoul_merged

(23, 5)


Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
19,"► Seongbuk District‎ (4 C, 7 P)",0.14,0,37.53532,126.84334
17,"► Seocho District‎ (3 C, 13 P)",0.12,0,37.54784,127.02461
14,"► Jungnang District‎ (3 C, 3 P)",0.12,0,37.49056,127.02
21,"► Songpa District‎ (4 C, 9 P)",0.14,0,37.53333,126.96667
11,"► Gwangjin District‎ (3 C, 8 P)",0.085714,1,37.60199,127.10461
1,"► Dongdaemun District‎ (3 C, 6 P)",0.08,1,37.50909,127.012269
2,"► Dongjak District‎ (3 C, 4 P)",0.08,1,37.55039,127.14546
5,"► Gangdong District‎ (3 C, 5 P)",0.08,1,37.47486,126.89106
20,"► Seongdong District‎ (3 C, 5 P)",0.1,2,37.56826,126.97783
18,"► Seodaemun District‎ (4 C, 18 P)",0.1,2,37.56826,126.97783


In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(seoul_merged['Latitude'], seoul_merged['Longitude'], seoul_merged['Neighborhood'], seoul_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [37]:
map_clusters.save('map_clusters.html')

## Cluster Analysis

In [38]:
seoul_merged.loc[seoul_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
19,"► Seongbuk District‎ (4 C, 7 P)",0.14,0,37.53532,126.84334
17,"► Seocho District‎ (3 C, 13 P)",0.12,0,37.54784,127.02461
14,"► Jungnang District‎ (3 C, 3 P)",0.12,0,37.49056,127.02
21,"► Songpa District‎ (4 C, 9 P)",0.14,0,37.53333,126.96667


In [39]:
#Cluster 1
seoul_merged.loc[seoul_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
11,"► Gwangjin District‎ (3 C, 8 P)",0.085714,1,37.60199,127.10461
1,"► Dongdaemun District‎ (3 C, 6 P)",0.08,1,37.50909,127.012269
2,"► Dongjak District‎ (3 C, 4 P)",0.08,1,37.55039,127.14546
5,"► Gangdong District‎ (3 C, 5 P)",0.08,1,37.47486,126.89106


In [40]:
#Cluster 2
seoul_merged.loc[seoul_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
20,"► Seongdong District‎ (3 C, 5 P)",0.1,2,37.56826,126.97783
18,"► Seodaemun District‎ (4 C, 18 P)",0.1,2,37.56826,126.97783
15,"► Mapo District‎ (5 C, 8 P)",0.1,2,37.56826,126.97783
13,"► Jung District, Seoul‎ (3 C, 27 P)",0.1,2,37.56826,126.97783
0,"► Dobong District‎ (3 C, 4 P)",0.1,2,37.61846,126.9278
10,"► Gwanak District‎ (4 C, 8 P)",0.1,2,37.56826,126.97783
9,"► Guro District, Seoul‎ (3 C, 8 P)",0.1,2,37.56826,126.97783
8,"► Geumcheon District‎ (3 C, 4 P)",0.1,2,37.53913,127.08366
7,"► Gangseo District, Seoul‎ (3 C, 2 P)",0.1,2,37.56826,126.97783
6,"► Gangnam District‎ (3 C, 17 P)",0.1,2,37.56826,126.97783


In [41]:
#Cluster 3
seoul_merged.loc[seoul_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
16,"► Nowon District‎ (3 C, 6 P)",0.02,3,37.61505,127.02496


In [42]:
#Cluster 4
seoul_merged.loc[seoul_merged['Cluster Labels'] == 4]

Unnamed: 0,Neighborhood,Korean Restaurant,Cluster Labels,Latitude,Longitude
3,"► Eunpyeong District‎ (2 C, 2 P)",0.18,4,37.4951,127.06278
