# Opening a New Restaurant in Arts District, Los Angeles


In [1]:
import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup 

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
#!pip install geocoder

from pandas.io.json import json_normalize

import json


from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors



print('Libraries imported.')

Libraries imported.


## 1. Scrap data from Wikopedia page into a dataframe

In [2]:
LA_data = requests.get('https://en.wikipedia.org/wiki/Downtown_Los_Angeles#Districts').text

# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(LA_data, 'html.parser')

# create a list to store district data
neighborhoodList = []


# append the data into the list
for row in soup.find_all("div", class_="div-col")[0].findAll("li"):
    neighborhoodList.append(row.text)
# create a new DataFrame from the list
df = pd.DataFrame({"Neighborhood": neighborhoodList})

df.head(20)

Unnamed: 0,Neighborhood
0,Arts District[36]
1,Bunker Hill[34]
2,Civic Center[34] (built on the razed site of t...
3,Fashion District[34]
4,Financial District
5,Flower District
6,Gallery Row
7,Historic Core (contains the Broadway Theater D...
8,Industrial District[34]
9,Jewelry District[34]


In [3]:
df.shape

(15, 1)

## 2. Get the geographical coodinates

#### The geographic coordinate of Los Angeles

In [4]:
address = "Los Angeles, US"

geolocator = Nominatim(user_agent="us_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Los Angeles are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Los Angeles are 34.0536909, -118.242766.


#### The Geographic coordinates of LA districts

In [8]:
# define a function to get coordinates
#!pip install geocoder
#print('geocoder installed.')
import geocoder

def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Los Angeles , US'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [9]:
coords = [ get_latlng(neighborhood) for neighborhood in df["Neighborhood"].tolist() ]
coords

[[34.041964000000064, -118.23638699999998],
 [34.05202000000003, -118.25035999999994],
 [34.05361000000005, -118.24549999999999],
 [34.03731000000005, -118.25276999999994],
 [37.795780000000036, -122.40047999999996],
 [34.04021000000006, -118.24939999999998],
 [34.10080000000005, -118.42613999999998],
 [34.05361000000005, -118.24549999999999],
 [43.059242700101315, -83.30750289711257],
 [28.475250000000074, -16.440609999999936],
 [34.04939200000007, -118.24057899999997],
 [34.05361000000005, -118.24549999999999],
 [33.99186599254229, -118.47637999731879],
 [34.04802500000005, -118.25844699999999],
 [36.000230000000045, -78.90555999999998]]

In [10]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])


In [11]:
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']

In [12]:
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Arts District[36],34.041964,-118.236387
1,Bunker Hill[34],34.05202,-118.25036
2,Civic Center[34] (built on the razed site of t...,34.05361,-118.2455
3,Fashion District[34],34.03731,-118.25277
4,Financial District,37.79578,-122.40048
5,Flower District,34.04021,-118.2494
6,Gallery Row,34.1008,-118.42614
7,Historic Core (contains the Broadway Theater D...,34.05361,-118.2455
8,Industrial District[34],43.059243,-83.307503
9,Jewelry District[34],28.47525,-16.44061


In [13]:
df.shape

(15, 3)

## 3. Create a map of Los Angeles with neighborhoods

In [16]:
import folium
#!pip install folium
print('folium installed.')

map_LosAngeles=folium.Map(location=[latitude,longitude], zoom_start=14)

#Add markers to the map

for lat, lng, neighborhood in zip(df['Latitude'],
                                  df['Longitude'],
                                  df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_LosAngeles)  
    
map_LosAngeles


folium installed.


## 4. Use the Foursquare API to explore the neighborhoods

####  Define Foursquare Credentials and Version

In [17]:
CLIENT_ID = '2D2GDQ0LRNTW00UB5XNPZXQTBV3UPSZLEN43ARWSB1DGHFFU' 
CLIENT_SECRET = 'FKX2Y2XQX2C1PBVNBDMTMIEU5CUSSQCFOAQ1I4TI02OLSX5B' 
VERSION = '20180605' 

#### get the top 100 venues within a radius of 500 meters

In [18]:
radius = 500
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))
    

In [19]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(669, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Arts District[36],34.041964,-118.236387,The Container Yard,34.043176,-118.236745,Art Gallery
1,Arts District[36],34.041964,-118.236387,Urth Caffé,34.041916,-118.235218,Coffee Shop
2,Arts District[36],34.041964,-118.236387,Resident,34.042616,-118.23506,Beer Garden
3,Arts District[36],34.041964,-118.236387,Mr. Speedy Plumbing & Rooter Inc.,34.042538,-118.233864,Home Service
4,Arts District[36],34.041964,-118.236387,Bavel,34.041506,-118.232955,Mediterranean Restaurant


In [20]:
venues_df.groupby(["Neighborhood"]).count()


Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arts District[36],43,43,43,43,43,43
Bunker Hill[34],65,65,65,65,65,65
Civic Center[34] (built on the razed site of the Central Business District during the 1880s–1890s),33,33,33,33,33,33
Fashion District[34],41,41,41,41,41,41
Financial District,96,96,96,96,96,96
Flower District,31,31,31,31,31,31
Gallery Row,1,1,1,1,1,1
"Historic Core (contains the Broadway Theater District, Spring Street Financial District and Old Bank District[34])",33,33,33,33,33,33
Industrial District[34],5,5,5,5,5,5
Jewelry District[34],4,4,4,4,4,4


#### Let's find out how many unique categories can be curated from all the returned venues



In [21]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))


There are 180 uniques categories.


In [22]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:20]

array(['Art Gallery', 'Coffee Shop', 'Beer Garden', 'Home Service',
       'Mediterranean Restaurant', 'Brewery', 'Climbing Gym',
       'Italian Restaurant', 'Pie Shop', 'German Restaurant',
       'Cocktail Bar', 'Grocery Store', 'Japanese Restaurant',
       'Seafood Restaurant', 'Ice Cream Shop', "Men's Store", 'Bookstore',
       'Café', 'Arcade', 'Smoothie Shop'], dtype=object)

#### Let's extract restaurant categories only


In [23]:
restaurant_df=venues_df[venues_df['VenueCategory'].str.contains('Restaurant')]
restaurant_df

Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
4,Arts District[36],34.041964,-118.236387,Bavel,34.041506,-118.232955,Mediterranean Restaurant
9,Arts District[36],34.041964,-118.236387,The Factory Kitchen,34.039080,-118.236118,Italian Restaurant
11,Arts District[36],34.041964,-118.236387,Wurstküche,34.045520,-118.236131,German Restaurant
15,Arts District[36],34.041964,-118.236387,Honda Ya,34.045226,-118.238523,Japanese Restaurant
16,Arts District[36],34.041964,-118.236387,Fisherman's Outlet,34.040906,-118.240112,Seafood Restaurant
...,...,...,...,...,...,...,...
646,Wholesale District or Warehouse District,36.000230,-78.905560,Dame's Chicken & Waffles,36.001694,-78.901276,Southern / Soul Food Restaurant
649,Wholesale District or Warehouse District,36.000230,-78.905560,Pop's Backdoor,35.999168,-78.909294,Italian Restaurant
655,Wholesale District or Warehouse District,36.000230,-78.905560,El Rodeo,35.999593,-78.909690,Mexican Restaurant
656,Wholesale District or Warehouse District,36.000230,-78.905560,Torero's Mexican Restaurant,35.999476,-78.908394,Mexican Restaurant


In [24]:
restaurant_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Arts District[36],11,11,11,11,11,11
Bunker Hill[34],16,16,16,16,16,16
Civic Center[34] (built on the razed site of the Central Business District during the 1880s–1890s),9,9,9,9,9,9
Fashion District[34],10,10,10,10,10,10
Financial District,35,35,35,35,35,35
Flower District,8,8,8,8,8,8
"Historic Core (contains the Broadway Theater District, Spring Street Financial District and Old Bank District[34])",9,9,9,9,9,9
Industrial District[34],1,1,1,1,1,1
Jewelry District[34],2,2,2,2,2,2
Little Tokyo[34],33,33,33,33,33,33


In [25]:
print('There are {} uniques restaurant categories.'.format(len(restaurant_df['VenueCategory'].unique())))


There are 37 uniques restaurant categories.


## 5. Analyze each neighborhood

In [26]:
# one hot encoding
kl_onehot = pd.get_dummies(restaurant_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighborhoods'] = restaurant_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head(20)

(192, 38)


Unnamed: 0,Neighborhoods,American Restaurant,Asian Restaurant,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Ethiopian Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,...,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoshoku Restaurant
4,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
11,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
23,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
33,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the total frequency of occurrence of each category

In [27]:
kl_grouped = kl_onehot.groupby(["Neighborhoods"]).sum().reset_index()

print(kl_grouped.shape)
kl_grouped

(14, 38)


Unnamed: 0,Neighborhoods,American Restaurant,Asian Restaurant,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Ethiopian Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,...,Southern / Soul Food Restaurant,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoshoku Restaurant
0,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
1,Bunker Hill[34],1,0,0,0,0,0,0,1,1,...,0,0,0,0,0,1,0,1,0,0
2,Civic Center[34] (built on the razed site of t...,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
3,Fashion District[34],0,2,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Financial District,3,0,2,1,1,0,0,0,0,...,0,1,1,2,0,0,0,0,2,0
5,Flower District,1,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
6,Historic Core (contains the Broadway Theater D...,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
7,Industrial District[34],1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Jewelry District[34],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Little Tokyo[34],1,0,0,0,0,0,0,0,0,...,0,0,9,0,0,0,1,1,0,1


In [28]:
kl_grouped['Total Restaurant'] = kl_grouped.sum(axis=1)
#kl_grouped=kl_grouped['Neighborhoods','Total']
kl_grouped

Unnamed: 0,Neighborhoods,American Restaurant,Asian Restaurant,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Ethiopian Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,...,Spanish Restaurant,Sushi Restaurant,Szechuan Restaurant,Tapas Restaurant,Thai Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoshoku Restaurant,Total Restaurant
0,Arts District[36],0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,11
1,Bunker Hill[34],1,0,0,0,0,0,0,1,1,...,0,0,0,0,1,0,1,0,0,16
2,Civic Center[34] (built on the razed site of t...,1,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,9
3,Fashion District[34],0,2,0,1,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,10
4,Financial District,3,0,2,1,1,0,0,0,0,...,1,1,2,0,0,0,0,2,0,35
5,Flower District,1,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,8
6,Historic Core (contains the Broadway Theater D...,1,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,0,0,9
7,Industrial District[34],1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,Jewelry District[34],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
9,Little Tokyo[34],1,0,0,0,0,0,0,0,0,...,0,9,0,0,0,1,1,0,1,33


In [29]:
len(kl_grouped[kl_grouped["Total Restaurant"] > 0])

14

In [30]:
kl_restaurant = kl_grouped[["Neighborhoods","Total Restaurant"]]
kl_restaurant

Unnamed: 0,Neighborhoods,Total Restaurant
0,Arts District[36],11
1,Bunker Hill[34],16
2,Civic Center[34] (built on the razed site of t...,9
3,Fashion District[34],10
4,Financial District,35
5,Flower District,8
6,Historic Core (contains the Broadway Theater D...,9
7,Industrial District[34],1
8,Jewelry District[34],2
9,Little Tokyo[34],33


## 6. Cluster Neighborhoods

#### Run k-means to cluster the neighborhoods in damascus into 3 clusters.

In [31]:
# set number of clusters
kclusters = 3

kl_clustering = kl_restaurant.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 0, 1, 1, 2, 1, 1, 1, 1, 2], dtype=int32)

In [32]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_restaurant.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [33]:
kl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kl_merged.head()

Unnamed: 0,Neighborhood,Total Restaurant,Cluster Labels
0,Arts District[36],11,1
1,Bunker Hill[34],16,0
2,Civic Center[34] (built on the razed site of t...,9,1
3,Fashion District[34],10,1
4,Financial District,35,2


In [34]:
# merge kl_grouped with df to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(df.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

(14, 5)


Unnamed: 0,Neighborhood,Total Restaurant,Cluster Labels,Latitude,Longitude
0,Arts District[36],11,1,34.041964,-118.236387
1,Bunker Hill[34],16,0,34.05202,-118.25036
2,Civic Center[34] (built on the razed site of t...,9,1,34.05361,-118.2455
3,Fashion District[34],10,1,34.03731,-118.25277
4,Financial District,35,2,37.79578,-122.40048


In [35]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

(14, 5)


Unnamed: 0,Neighborhood,Total Restaurant,Cluster Labels,Latitude,Longitude
1,Bunker Hill[34],16,0,34.05202,-118.25036
11,South Park[34],16,0,33.991866,-118.47638
12,Toy District,21,0,34.048025,-118.258447
0,Arts District[36],11,1,34.041964,-118.236387
2,Civic Center[34] (built on the razed site of t...,9,1,34.05361,-118.2455
3,Fashion District[34],10,1,34.03731,-118.25277
5,Flower District,8,1,34.04021,-118.2494
6,Historic Core (contains the Broadway Theater D...,9,1,34.05361,-118.2455
7,Industrial District[34],1,1,43.059243,-83.307503
8,Jewelry District[34],2,1,28.47525,-16.44061


In [36]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Neighborhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [37]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]


Unnamed: 0,Neighborhood,Total Restaurant,Cluster Labels,Latitude,Longitude
1,Bunker Hill[34],16,0,34.05202,-118.25036
11,South Park[34],16,0,33.991866,-118.47638
12,Toy District,21,0,34.048025,-118.258447


In [38]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]


Unnamed: 0,Neighborhood,Total Restaurant,Cluster Labels,Latitude,Longitude
0,Arts District[36],11,1,34.041964,-118.236387
2,Civic Center[34] (built on the razed site of t...,9,1,34.05361,-118.2455
3,Fashion District[34],10,1,34.03731,-118.25277
5,Flower District,8,1,34.04021,-118.2494
6,Historic Core (contains the Broadway Theater D...,9,1,34.05361,-118.2455
7,Industrial District[34],1,1,43.059243,-83.307503
8,Jewelry District[34],2,1,28.47525,-16.44061
10,Skid Row[34],9,1,34.05361,-118.2455
13,Wholesale District or Warehouse District,12,1,36.00023,-78.90556


In [39]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]


Unnamed: 0,Neighborhood,Total Restaurant,Cluster Labels,Latitude,Longitude
4,Financial District,35,2,37.79578,-122.40048
9,Little Tokyo[34],33,2,34.049392,-118.240579
