# Yoga Studio Optimal Location Finder

In [1]:
import numpy as np
import csv
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
import requests # library to handle requests
from pandas.io.json import json_normalize
import folium
print('Finished Importing Packages')

Finished Importing Packages


### Load Dataset

In [2]:
df = pd.read_csv(r"E:\Downloads E\chic_coords.csv")
df.head()

Unnamed: 0,Zip,City,State,Latitude,Longitude
0,46312,East Chicago,IN,41.64,-87.461
1,60064,North Chicago,IL,42.326,-87.852
2,60086,North Chicago,IL,42.433,-87.777
3,60185,West Chicago,IL,41.892,-88.205
4,60186,West Chicago,IL,41.84,-88.089


### Only Select Neighbourhoods from Chicago Proper

In [3]:
df = df[df.City == 'Chicago']
df.drop(['City', 'State'], axis = 1)
city_areas = df[['Zip', 'Latitude', 'Longitude']]
city_areas.head()

Unnamed: 0,Zip,Latitude,Longitude
8,60601,41.886,-87.623
9,60602,41.883,-87.629
10,60603,41.88,-87.63
11,60604,41.878,-87.628
12,60605,41.86,-87.619


### Visualize City Areas

In [4]:
city_map = folium.Map(location=[41.882, -87.645], zoom_start=10)
for lat, lng, zipcode in zip(city_areas['Latitude'], city_areas['Longitude'], city_areas['Zip']):
    label = '{}'.format(zipcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(city_map)
city_map

### Obtain Foursquare Location Data

In [5]:
CLIENT_ID = 'X55WKRFHFORIEZCWGAXX0ABI2RO5HXD0E5NQCBVAMQFRGIQD' # your Foursquare ID
CLIENT_SECRET = '1FHGN5CRA2X4SN2SG0K1G43QL33Z34L3VHZUQDGGNH1UMT5M' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [6]:
def getNearbyVenues(postcode, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for postcode, lat, lng in zip(postcode, latitudes, longitudes):
        
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            postcode, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Zip', 
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [7]:
city_venues = getNearbyVenues(postcode=city_areas['Zip'],
                                   latitudes=city_areas['Latitude'],
                                   longitudes=city_areas['Longitude']
                                  )
print(city_venues.shape)
city_venues.head()

(1902, 7)


Unnamed: 0,Zip,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,60601,41.886,-87.623,sweetgreen,41.884964,-87.624728,Salad Place
1,60601,41.886,-87.623,Chicago Architecture Center,41.88772,-87.62365,Tour Provider
2,60601,41.886,-87.623,Roti Modern Mediterranean,41.886048,-87.624948,Mediterranean Restaurant
3,60601,41.886,-87.623,Wildberry Pancakes & Cafe,41.884412,-87.623047,Breakfast Spot
4,60601,41.886,-87.623,St. Jane Chicago,41.886573,-87.624902,Hotel


In [8]:
city_venues.groupby('Zip').count().head()

Unnamed: 0_level_0,Latitude,Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Zip,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
60601,100,100,100,100,100,100
60602,100,100,100,100,100,100
60603,100,100,100,100,100,100
60604,100,100,100,100,100,100
60605,21,21,21,21,21,21


In [9]:
print('There are {} unique venue categories.'.format(len(city_venues['Venue Category'].unique())))

There are 250 unique venue categories.


In [10]:
# one hot encoding
city_onehot = pd.get_dummies(city_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
city_onehot['Zip'] = city_venues['Zip'] 

# move neighborhood column to the first column
fixed_columns = [city_onehot.columns[-1]] + list(city_onehot.columns[:-1])
city_onehot = city_onehot[fixed_columns]

In [11]:
city_grouped = city_onehot.groupby('Zip').mean().reset_index()

In [12]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [13]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Zip']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Zip'] = city_grouped['Zip']

for ind in np.arange(city_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(city_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,60601,Hotel,Coffee Shop,Seafood Restaurant,Plaza,American Restaurant,Park,Steakhouse,Bakery,Theater,Bar
1,60602,Coffee Shop,Hotel,Theater,American Restaurant,Mediterranean Restaurant,Gastropub,Italian Restaurant,Snack Place,New American Restaurant,Bar
2,60603,Coffee Shop,Hotel,Middle Eastern Restaurant,Salad Place,American Restaurant,Vegetarian / Vegan Restaurant,Theater,Asian Restaurant,Dessert Shop,Museum
3,60604,Coffee Shop,Sandwich Place,Hotel,Italian Restaurant,Theater,Asian Restaurant,Café,Smoke Shop,Snack Place,Salad Place
4,60605,Football Stadium,Historic Site,Park,Sporting Goods Shop,Museum,Burger Joint,Bistro,English Restaurant,Sushi Restaurant,Bar


### Cluster neighbourhoods by common venues

In [14]:
# set number of clusters
kclusters = 5

city_grouped_clustering = city_grouped.drop('Zip', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(city_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 3, 4, 1])

In [15]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

city_merged = city_areas


city_merged = city_merged.join(neighborhoods_venues_sorted.set_index('Zip'), on='Zip')

city_merged.head()

Unnamed: 0,Zip,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,60601,41.886,-87.623,1,Hotel,Coffee Shop,Seafood Restaurant,Plaza,American Restaurant,Park,Steakhouse,Bakery,Theater,Bar
9,60602,41.883,-87.629,1,Coffee Shop,Hotel,Theater,American Restaurant,Mediterranean Restaurant,Gastropub,Italian Restaurant,Snack Place,New American Restaurant,Bar
10,60603,41.88,-87.63,1,Coffee Shop,Hotel,Middle Eastern Restaurant,Salad Place,American Restaurant,Vegetarian / Vegan Restaurant,Theater,Asian Restaurant,Dessert Shop,Museum
11,60604,41.878,-87.628,1,Coffee Shop,Sandwich Place,Hotel,Italian Restaurant,Theater,Asian Restaurant,Café,Smoke Shop,Snack Place,Salad Place
12,60605,41.86,-87.619,1,Football Stadium,Historic Site,Park,Sporting Goods Shop,Museum,Burger Joint,Bistro,English Restaurant,Sushi Restaurant,Bar


In [16]:
latitude = 41.882
longitude = -87.645
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city_merged['Latitude'], city_merged['Longitude'], city_merged['Zip'], city_merged['Cluster Labels']):
    if np.isnan(cluster):
        cluster = 0
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [17]:
city_merged.loc[city_merged['Cluster Labels'] == 0, city_merged.columns[[0] + list(range(4, city_merged.shape[1]))]].head()

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
64,60663,Convenience Store,Ice Cream Shop,Pharmacy,Lounge,Mexican Restaurant,Bank,Italian Restaurant,Rental Car Location,Arts & Crafts Store,Thrift / Vintage Store
65,60664,Convenience Store,Ice Cream Shop,Pharmacy,Lounge,Mexican Restaurant,Bank,Italian Restaurant,Rental Car Location,Arts & Crafts Store,Thrift / Vintage Store
66,60665,Convenience Store,Ice Cream Shop,Pharmacy,Lounge,Mexican Restaurant,Bank,Italian Restaurant,Rental Car Location,Arts & Crafts Store,Thrift / Vintage Store
67,60667,Convenience Store,Ice Cream Shop,Pharmacy,Lounge,Mexican Restaurant,Bank,Italian Restaurant,Rental Car Location,Arts & Crafts Store,Thrift / Vintage Store
68,60668,Convenience Store,Ice Cream Shop,Pharmacy,Lounge,Mexican Restaurant,Bank,Italian Restaurant,Rental Car Location,Arts & Crafts Store,Thrift / Vintage Store


In [18]:
city_merged.loc[city_merged['Cluster Labels'] == 1, city_merged.columns[[0] + list(range(4, city_merged.shape[1]))]]

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,60601,Hotel,Coffee Shop,Seafood Restaurant,Plaza,American Restaurant,Park,Steakhouse,Bakery,Theater,Bar
9,60602,Coffee Shop,Hotel,Theater,American Restaurant,Mediterranean Restaurant,Gastropub,Italian Restaurant,Snack Place,New American Restaurant,Bar
10,60603,Coffee Shop,Hotel,Middle Eastern Restaurant,Salad Place,American Restaurant,Vegetarian / Vegan Restaurant,Theater,Asian Restaurant,Dessert Shop,Museum
11,60604,Coffee Shop,Sandwich Place,Hotel,Italian Restaurant,Theater,Asian Restaurant,Café,Smoke Shop,Snack Place,Salad Place
12,60605,Football Stadium,Historic Site,Park,Sporting Goods Shop,Museum,Burger Joint,Bistro,English Restaurant,Sushi Restaurant,Bar
13,60606,Coffee Shop,Sandwich Place,New American Restaurant,Donut Shop,Mediterranean Restaurant,American Restaurant,Vegetarian / Vegan Restaurant,Falafel Restaurant,BBQ Joint,Hotel
14,60607,Greek Restaurant,Coffee Shop,Café,Pizza Place,Convenience Store,Sandwich Place,Sports Bar,Gym,Dance Studio,Food & Drink Shop
17,60610,Italian Restaurant,Sandwich Place,Gym / Fitness Center,Gym,Bar,Yoga Studio,Paper / Office Supplies Store,Coffee Shop,Deli / Bodega,Fast Food Restaurant
18,60611,Italian Restaurant,American Restaurant,Café,Steakhouse,Coffee Shop,Bar,Boutique,Shoe Store,Salon / Barbershop,Restaurant
19,60612,Gas Station,Park,Chinese Restaurant,Sandwich Place,Fast Food Restaurant,Hockey Arena,Pharmacy,Currency Exchange,Donut Shop,Sports Bar


In [19]:
city_merged.loc[city_merged['Cluster Labels'] == 2, city_merged.columns[[0] + list(range(4, city_merged.shape[1]))]]

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
39,60633,Discount Store,Lounge,Greek Restaurant,Park,Yoga Studio,English Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Escape Room
47,60643,Park,Intersection,American Restaurant,Lounge,Yoga Studio,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room
53,60650,Intersection,Airport,Yoga Studio,Escape Room,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Exhibit
88,60693,Park,Dog Run,Cafeteria,Gym Pool,English Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Escape Room


In [20]:
city_merged.loc[city_merged['Cluster Labels'] == 3, city_merged.columns[[0] + list(range(4, city_merged.shape[1]))]]

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,60608,Pizza Place,Mexican Restaurant,Convenience Store,Bar,Bakery,Massage Studio,Automotive Shop,Food & Drink Shop,Dessert Shop,Ice Cream Shop
23,60616,Chinese Restaurant,Park,Storage Facility,Bakery,New American Restaurant,Pizza Place,Seafood Restaurant,Mexican Restaurant,Vietnamese Restaurant,Flower Shop
24,60617,Flower Shop,Park,Mexican Restaurant,Bar,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Escape Room,Yoga Studio
26,60619,Donut Shop,Train Station,Pizza Place,BBQ Joint,Mexican Restaurant,Sandwich Place,Lounge,Caribbean Restaurant,Chinese Restaurant,Butcher
30,60623,Church,Gym / Fitness Center,Discount Store,Train Station,Park,Food Truck,Café,Pizza Place,Escape Room,Exhibit
32,60625,Park,Gym,Bank,Ice Cream Shop,Bakery,Mexican Restaurant,Soccer Field,Farmers Market,Falafel Restaurant,Exhibit
33,60626,Mexican Restaurant,American Restaurant,Pizza Place,Asian Restaurant,Bar,Bakery,Chinese Restaurant,Theater,Dive Bar,Farmers Market
35,60629,Pizza Place,Bakery,Mexican Restaurant,Liquor Store,Laundromat,Italian Restaurant,Dance Studio,Deli / Bodega,Filipino Restaurant,Creperie
44,60639,Mexican Restaurant,Donut Shop,Ice Cream Shop,Sandwich Place,Discount Store,Bus Station,Pizza Place,Exhibit,Escape Room,Dumpling Restaurant
46,60641,Convenience Store,Spa,Mexican Restaurant,Fast Food Restaurant,Chinese Restaurant,Art Gallery,English Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant


In [21]:
city_merged.loc[city_merged['Cluster Labels'] == 4, city_merged.columns[[0] + list(range(4, city_merged.shape[1]))]]

Unnamed: 0,Zip,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,60609,Shipping Store,Yoga Studio,Donut Shop,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Exhibit,Escape Room,English Restaurant


### Find Suburban Cluster

In [28]:
cols = ['1st Most Common Venue','2nd Most Common Venue','3rd Most Common Venue']
suburban_clusters = []
suburban_score = 0.25
for i in range(5):
    parks = 0
    temp = city_merged.loc[city_merged['Cluster Labels'] == i, ['1st Most Common Venue', '2nd Most Common Venue', '3rd Most Common Venue']]
    rows = len(temp.index)
    for col in cols:
         parks = parks + temp[temp == 'Park'][col].count()
    parks = parks/rows
    if parks>suburban_score:
        suburban_clusters.append(i)
suburban_clusters

[2, 3]

In [32]:
good_zips = []
cols = ['1st Most Common Venue','2nd Most Common Venue','3rd Most Common Venue', '4th Most Common Venue', '5th Most Common Venue','6th Most Common Venue','7th Most Common Venue','8th Most Common Venue']
for i in suburban_clusters:
    temp = city_merged.loc[city_merged['Cluster Labels'] == i, ['Zip','1st Most Common Venue','2nd Most Common Venue','3rd Most Common Venue', '4th Most Common Venue', '5th Most Common Venue','6th Most Common Venue','7th Most Common Venue','8th Most Common Venue']]
    for index, row in temp.iterrows():
        good = True
        for col in cols:
            if row[col] == 'Yoga Studio':
                good = False
                break
        if good == True:
            good_zips.append(row['Zip'])
print('Ideal zip codes for a new Yoga Studio include:')
for i in good_zips:
    print(i)
        
            
      

Ideal zip codes for a new Yoga Studio include:
60693
60608
60616
60617
60619
60623
60625
60626
60629
60639
60641
60647
60651
60655
60656
60659
