In [1]:
!pip install --quiet --upgrade uszipcode
!pip install --quiet folium

In [2]:
from uszipcode import SearchEngine
import json
from pandas.io.json import json_normalize
import requests
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import folium
import folium.plugins as plugins

num_clusters = 3

In [3]:
#using Zipcode database, I am retrieving all the zipcodes and the associated demographics. This crucial demogrpahic data will be a major factor in
#recommending location for the restaurant. All this information is stored in a list then moved to a dataframe at the end.

search = SearchEngine(simple_zipcode=True)
zipcode = search.by_city_and_state("Charlotte", "NC", returns =50)

w, h = 9, 27;
zip_info = [[0 for x in range(w)] for y in range(h)] 
i =0

for zipinfo in zipcode:
    if zipinfo.bounds_south:
        append_list = [zipinfo.zipcode,zipinfo.lat,zipinfo.lng,zipinfo.population,zipinfo.population_density, zipinfo.median_home_value, zipinfo.median_household_income]
        zip_info[i] = append_list
        i = i+1
    else:
        pass

city_demographics_df = pd.DataFrame(zip_info, columns =['zipcode','latitude','longitude','population','population_density','median_home_value','median_household_income'])

Start downloading data for simple zipcode database, total size 9MB ...
  1 MB finished ...
  2 MB finished ...
  3 MB finished ...
  4 MB finished ...
  5 MB finished ...
  6 MB finished ...
  7 MB finished ...
  8 MB finished ...
  9 MB finished ...
  10 MB finished ...
  Complete!


In [4]:
city_demographics_df

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0
2,28204,35.22,-80.83,4796,2774.0,304600.0,56286.0
3,28205,35.22,-80.79,43931,3716.0,160100.0,35310.0
4,28206,35.25,-80.82,11898,1686.0,86400.0,21087.0
5,28207,35.2,-80.82,9280,3686.0,743500.0,119063.0
6,28208,35.24,-80.91,34167,1553.0,86400.0,28435.0
7,28209,35.18,-80.85,20317,3705.0,268300.0,60180.0
8,28210,35.13,-80.85,42263,3327.0,242500.0,54915.0
9,28211,35.17,-80.79,28523,2647.0,366700.0,70403.0


In [5]:
#discard any zip codes with population less than 100, as they won't be in consideration for a restaurant location.
city_demographics_df.drop(city_demographics_df[city_demographics_df.population < 100].index, axis=0, inplace=True)

In [6]:
city_demographics_df.shape

(24, 7)

In [7]:
demo_cluster_df = city_demographics_df.drop(columns =['zipcode','latitude','longitude'], axis = 1)

#using standard scaler to standardize the data and produce clusters
cluster_dataset = StandardScaler().fit_transform(demo_cluster_df)

k_means = KMeans(init="k-means++", n_clusters=num_clusters, random_state = 0,n_init=12)
k_means.fit(cluster_dataset)
k_means.labels_

array([2, 2, 2, 0, 0, 1, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2,
       0], dtype=int32)

In [8]:
#putting the zipcode back in clustering_df so that I can use it for future clustering with other variables added in
demo_cluster_df ['zipcode'] = city_demographics_df['zipcode']

In [9]:
#reordered the columns so that zipcode is the first column
cols = demo_cluster_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
demo_cluster_df.columns = cols

In [10]:
city_demographics_df['demo_cluster']=k_means.labels_
zip_clusters = city_demographics_df.drop(columns =['latitude','longitude'], axis = 1)

In [11]:
city_demographics_df.shape

(24, 8)

## Cluster Analysis for Demograpic data

In [12]:
zip_clusters.groupby('demo_cluster').mean().reset_index()

Unnamed: 0,demo_cluster,population,population_density,median_home_value,median_household_income
0,0,38179.642857,1950.642857,134028.571429,45467.357143
1,1,9280.0,3686.0,743500.0,119063.0
2,2,27431.555556,3291.777778,292644.444444,69044.111111


### Cluster #0  - Low Income Low Desnity - Lean would be No

In [13]:
zip_clusters.loc[zip_clusters.demo_cluster==0,zip_clusters.columns[range(0,len(zip_clusters.columns))]]

Unnamed: 0,zipcode,population,population_density,median_home_value,median_household_income,demo_cluster
3,28205,43931,3716.0,160100.0,35310.0,0
4,28206,11898,1686.0,86400.0,21087.0,0
6,28208,34167,1553.0,86400.0,28435.0,0
10,28212,38457,4162.0,114400.0,33781.0,0
11,28213,37309,2700.0,136700.0,42405.0,0
12,28214,34721,1060.0,126100.0,53527.0,0
13,28215,53629,1757.0,127500.0,45983.0,0
14,28216,47208,1579.0,123400.0,48491.0,0
15,28217,24204,1634.0,107500.0,38832.0,0
17,28227,49635,1283.0,149700.0,51527.0,0


### Cluster 1 - High Density High Income - Lean is Strong Yes

In [14]:
zip_clusters.loc[zip_clusters.demo_cluster==1,zip_clusters.columns[range(0,len(zip_clusters.columns))]]

Unnamed: 0,zipcode,population,population_density,median_home_value,median_household_income,demo_cluster
5,28207,9280,3686.0,743500.0,119063.0,1


### Cluster 2 -  High Population Medium Income - Yes

In [15]:
zip_clusters.loc[zip_clusters.demo_cluster==2,zip_clusters.columns[range(0,len(zip_clusters.columns))]]

Unnamed: 0,zipcode,population,population_density,median_home_value,median_household_income,demo_cluster
0,28202,11195,6213.0,251200.0,70300.0,2
1,28203,11315,3411.0,367400.0,64604.0,2
2,28204,4796,2774.0,304600.0,56286.0,2
7,28209,20317,3705.0,268300.0,60180.0,2
8,28210,42263,3327.0,242500.0,54915.0,2
9,28211,28523,2647.0,366700.0,70403.0,2
16,28226,37286,2500.0,281800.0,68291.0,2
21,28270,31525,2499.0,276600.0,84429.0,2
23,28277,59664,2550.0,274700.0,91989.0,2


In [16]:
#updating cluster labels based on our observations to make it easy to do visual analysis on the map.
zip_clusters.insert(loc=0,column='demo_lean',value='')

In [17]:
#updating cluster descr
zip_clusters_dict = {0: 'No',1:'Strong Yes',2:'Yes'}
zip_clusters['demo_lean']= zip_clusters['demo_cluster'].apply(lambda x:zip_clusters_dict[x])
city_demographics_df['demo_lean'] = zip_clusters['demo_lean']

In [18]:
zip_map_df = zip_clusters[['zipcode','demo_cluster','demo_lean']].copy()
zip_map_df.insert(loc=1,column ='latitude', value =0.0)
zip_map_df.insert(loc=2,column ='longitude', value =0.0)

In [19]:
zip_map_df.set_index('zipcode', inplace=True)
zip_map_df.update(city_demographics_df.set_index('zipcode'))

In [20]:
zip_map_df.reset_index(inplace=True)  # to recover the initial structure

In [21]:
city_demographics_df.columns

Index(['zipcode', 'latitude', 'longitude', 'population', 'population_density',
       'median_home_value', 'median_household_income', 'demo_cluster',
       'demo_lean'],
      dtype='object')

### A note on visualization. Since I have four dimensions to represent on the map, here is the approach I took.
#### Color Coding - Dark Green: Strong Yes, Green - Yes, Yellow - Maybe and Red - No
#### Size - Bigger the circle, higher the weight/importance. Please see the final map at the bottom to see how all four dimensions are represented

### Visauliazation of demopgrahic clusters

In [22]:
from geopy.geocoders import Nominatim 
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [23]:
address = 'Charlotte, NC'
geolocator = Nominatim(user_agent="CLT_explr")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [26]:
color_dict = {'Strong Yes':'darkgreen', 'Yes':'lime','Maybe':'yellow','No':'Red'}
map_clt = folium.Map(location=[latitude, longitude], zoom_start=11)

In [27]:
demo_clt = folium.Map(location=[latitude, longitude], zoom_start=11)

for zipcode,lat, lon,cluster,cluster_lean in zip(zip_map_df['zipcode'],zip_map_df['latitude'],zip_map_df['longitude'],zip_map_df['demo_cluster'],zip_map_df['demo_lean']):
    label = folium.Popup(str(zipcode) + ' Demographic lean:' + str(cluster_lean), parse_html=True)
    folium.CircleMarker([lat, lon], radius=20, popup=label, color=color_dict[cluster_lean], fill=False, fill_opacity=0).add_to(demo_clt)

demo_clt


In [28]:
#this function takes in API info for crime data and the type of incident, and returns a Dataframe with the crime info from the API. Mainly we are interested
#in the location of the crime. As this factors into our recommendation for the restaurant

def get_crime_data(url,type_crime):
    crime_results = requests.get(url).json()
    if crime_results:
        crime_df = json_normalize(crime_results['features'])
        crime_df.rename(index=str, columns={"attributes.ObjectID": "type","attributes.Longitude": "longitude","attributes.Latitude":"latitude","attributes.YR":"year" }, inplace=True)
        crime_df = crime_df[['latitude','longitude','year']]
        crime_df['type'] = type_crime
        return crime_df
    else:
        print ("problem with API call for", type_crime ," at", url)

In [29]:
# All the crime data is gathered from Charlotte Open Data portal here: http://data.charlottenc.gov/
# Three seperate APIs are used based on type of incidents. All data from get_crime_data is accumulated in a data frame

#crimes 1 CMPD Officer-Involved Shootings  - Individuals
url = 'https://services.arcgis.com/9Nl857LBlQVyzq54/arcgis/rest/services/Police/FeatureServer/1/query?where=1%3D1&outFields=*&outSR=4326&f=json'
all_crime_df = get_crime_data(url,'Individual')

# CMPD Officer Involved Shootings - Officers
url = 'https://services.arcgis.com/9Nl857LBlQVyzq54/arcgis/rest/services/Police/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json'
all_crime_df = pd.concat([all_crime_df,get_crime_data(url,'Officers')], axis=0)

# CMPD Officer-Involved Shootings - Incidents
url = 'https://services.arcgis.com/9Nl857LBlQVyzq54/arcgis/rest/services/Police/FeatureServer/2/query?where=1%3D1&outFields=*&outSR=4326&f=json'
all_crime_df = pd.concat([all_crime_df,get_crime_data(url,'Incidents')], axis=0)
all_crime_df.shape

(270, 4)

In [30]:
all_crime_df.head(5)

Unnamed: 0,latitude,longitude,year,type
0,35.224496,-80.896504,2005,Individual
1,35.226991,-80.729299,2005,Individual
2,35.199025,-80.756286,2005,Individual
3,35.219339,-80.876367,2005,Individual
4,35.144363,-80.861774,2005,Individual


In [31]:
all_crime_df.insert(loc=0,column='zipcode',value=0)

In [32]:
# This function will return zipcode based on latitude and longitude.
def get_zip(latitude,longitude):
    result = search.by_coordinates(latitude, longitude, radius=10, returns=1)
    try:
        return result[0].zipcode        
    except IndexError:
        return 0

In [33]:
#zipcode in crime dataframe is populated using get_zip function (see above)
all_crime_df['zipcode'] = all_crime_df[['latitude','longitude']].apply(lambda x: get_zip(*x), axis=1)

In [34]:
#delete all rows where no valid zipcode was returned
all_crime_df = all_crime_df[all_crime_df['zipcode']!=0]

In [35]:
#since crime data is at individual level
agg_crime_df = all_crime_df.groupby('zipcode', as_index=False).agg({"type": "count"})

In [36]:
# divided the number of crimes in a zipcode by total population.
agg_crime_df['type'] = agg_crime_df['type']/city_demographics_df['population']

In [37]:
#using standard scaler to standardize the data and produce clusters
cluster_dataset = StandardScaler().fit_transform(agg_crime_df.drop(columns =['zipcode'], axis = 1))

k_means = KMeans(init="k-means++", n_clusters=num_clusters, random_state = 0,n_init=12)
k_means.fit(cluster_dataset)
k_means.labels_

array([2, 2, 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [38]:
agg_crime_df['crime_cluster'] = k_means.labels_

### Crime Clusters Analysis

In [39]:
agg_crime_df.groupby('crime_cluster').mean().reset_index()

Unnamed: 0,crime_cluster,type
0,0,0.000286
1,1,0.003322
2,2,0.001592


### Cluster - 0 : Low Crime -  Yes

In [40]:
agg_crime_df.loc[agg_crime_df.crime_cluster==0,agg_crime_df.columns[range(0,len(agg_crime_df.columns))]]

Unnamed: 0,zipcode,type,crime_cluster
2,28204,0.000834,0
3,28205,0.000615,0
6,28209,0.000439,0
7,28210,0.000148,0
8,28211,7.1e-05,0
10,28213,0.000338,0
11,28215,0.000268,0
12,28216,0.000374,0
13,28217,0.000112,0
14,28269,0.000254,0


### Cluster 1 - High Crime - No

In [41]:
agg_crime_df.loc[agg_crime_df.crime_cluster==1,agg_crime_df.columns[range(0,len(agg_crime_df.columns))]]

Unnamed: 0,zipcode,type,crime_cluster
4,28206,0.00395,1
5,28208,0.002694,1


### Cluster 2 - Medium Crime - Maybe

In [42]:
agg_crime_df.loc[agg_crime_df.crime_cluster==2,agg_crime_df.columns[range(0,len(agg_crime_df.columns))]]

Unnamed: 0,zipcode,type,crime_cluster
0,28202,0.001429,2
1,28203,0.001944,2
9,28212,0.001402,2


In [43]:
#updating crime cluster lean
agg_crime_df_dict = {0: 'Yes',1:'No',2:'Maybe'}
agg_crime_df['crime_cluster_lean']= agg_crime_df['crime_cluster'].apply(lambda x:agg_crime_df_dict[x])
agg_crime_df

Unnamed: 0,zipcode,type,crime_cluster,crime_cluster_lean
0,28202,0.001429,2,Maybe
1,28203,0.001944,2,Maybe
2,28204,0.000834,0,Yes
3,28205,0.000615,0,Yes
4,28206,0.00395,1,No
5,28208,0.002694,1,No
6,28209,0.000439,0,Yes
7,28210,0.000148,0,Yes
8,28211,7.1e-05,0,Yes
9,28212,0.001402,2,Maybe


In [44]:
city_demographics_df.head(2)

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income,demo_cluster,demo_lean
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0,2,Yes
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0,2,Yes


In [45]:
agg_crime_df['latitude'] = 0.0
agg_crime_df['longitude'] = 0.0

In [46]:
agg_crime_df.set_index('zipcode', inplace=True)
agg_crime_df.update(city_demographics_df.set_index('zipcode'))
agg_crime_df.reset_index(inplace = True)  # to recover the initial structure

In [49]:
agg_crime_df.rename(index=str, columns={"type": "crime_per1000"}, inplace=True)
agg_crime_df.reset_index(drop = True)

Unnamed: 0,zipcode,crime_per1000,crime_cluster,crime_cluster_lean,latitude,longitude
0,28202,0.001429,2,Maybe,35.23,-80.84
1,28203,0.001944,2,Maybe,35.21,-80.86
2,28204,0.000834,0,Yes,35.22,-80.83
3,28205,0.000615,0,Yes,35.22,-80.79
4,28206,0.00395,1,No,35.25,-80.82
5,28208,0.002694,1,No,35.24,-80.91
6,28209,0.000439,0,Yes,35.18,-80.85
7,28210,0.000148,0,Yes,35.13,-80.85
8,28211,7.1e-05,0,Yes,35.17,-80.79
9,28212,0.001402,2,Maybe,35.19,-80.75


## Crime Cluster Visualization

In [50]:
crime_clt = folium.Map(location=[latitude, longitude], zoom_start=11)

for zipcode,dummy,cluster,cluster_lean,lat,lon in zip(agg_crime_df['zipcode'],agg_crime_df['crime_per1000'],agg_crime_df['crime_cluster'],
                                                 agg_crime_df['crime_cluster_lean'],agg_crime_df['latitude'],agg_crime_df['longitude']):
    label = folium.Popup(str(zipcode) + ' Crime Cluster lean: ' + str(cluster_lean), parse_html=True)
    folium.CircleMarker([lat, lon],radius=5,popup=label,color=color_dict.get(cluster_lean, 'yellow'),fill=False,fill_opacity=0).add_to(crime_clt)

crime_clt

In [51]:
city_demographics_df = pd.merge(city_demographics_df, agg_crime_df[['zipcode','crime_per1000','crime_cluster','crime_cluster_lean']], how='left',on='zipcode')

In [52]:
city_demographics_df.head(2)

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income,demo_cluster,demo_lean,crime_per1000,crime_cluster,crime_cluster_lean
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0,2,Yes,0.001429,2.0,Maybe
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0,2,Yes,0.001944,2.0,Maybe


In [53]:
agg_crime_df.head(2)

Unnamed: 0,zipcode,crime_per1000,crime_cluster,crime_cluster_lean,latitude,longitude
0,28202,0.001429,2,Maybe,35.23,-80.84
1,28203,0.001944,2,Maybe,35.21,-80.86


### Shopping Center Data Analysis

In [54]:
#Existing Shopping Centers  - http://data.charlottenc.gov/datasets/existing-shopping-centers/geoservice
url = 'https://maps.ci.charlotte.nc.us/arcgis/rest/services/ODP/PLN_MasterResource/MapServer/55/query?where=1%3D1&outFields=*&outSR=4326&f=json'

shopping_center_results = requests.get(url).json()
shopping_center_df = json_normalize(shopping_center_results['features'])
shopping_center_df.rename(index=str, columns={"attributes.CenterClass": "type","attributes.CenterSize": "size","geometry.rings":"polygon" }, inplace=True)
shopping_center_df = shopping_center_df[['type','size','polygon']]
shopping_center_df.head(2)

Unnamed: 0,type,size,polygon
0,Regional,745951.0,"[[[-80.74928773616489, 35.30679682239016], [-8..."
1,Neighborhood,81809.0,"[[[-80.73615768708153, 35.20179771919721], [-8..."


In [55]:
shopping_center_df.insert(loc =0, column = 'latitude', value = shopping_center_df.polygon.apply(lambda x:x[0][0][0]))

In [56]:
shopping_center_df.insert(loc =1, column = 'longitude', value = shopping_center_df.polygon.apply(lambda x:x[0][0][1]))

In [57]:
shopping_center_df.head(2)

Unnamed: 0,latitude,longitude,type,size,polygon
0,-80.749288,35.306797,Regional,745951.0,"[[[-80.74928773616489, 35.30679682239016], [-8..."
1,-80.736158,35.201798,Neighborhood,81809.0,"[[[-80.73615768708153, 35.20179771919721], [-8..."


In [58]:
shopping_center_df.drop(['polygon'],axis=1,inplace=True)

In [59]:
shopping_center_df.head(2)

Unnamed: 0,latitude,longitude,type,size
0,-80.749288,35.306797,Regional,745951.0
1,-80.736158,35.201798,Neighborhood,81809.0


In [60]:
def get_zip_code(latitude,longitude):
    result = search.by_coordinates( longitude,latitude,radius=20, returns=1)
    try:
        return result[0].zipcode
    except:
        print(latitude, longitude, " retruned no zipcode")
        return 0

In [61]:
shopping_center_df['zipcode'] = shopping_center_df[['latitude','longitude']].apply(lambda x: get_zip_code(*x), axis=1)

In [62]:
shopping_center_df = shopping_center_df[['type','size','zipcode']]

In [63]:
shopping_center_df.head(10)

Unnamed: 0,type,size,zipcode
0,Regional,745951.0,28262
1,Neighborhood,81809.0,28212
2,Regional,582651.0,28270
3,Neighborhood,113041.0,28226
4,Convenience,55761.0,28262
5,Community,181771.0,28217
6,Community,225534.0,28205
7,Community,238135.0,28105
8,Convenience,58105.0,28208
9,Neighborhood,106297.0,28208


In [64]:
shopping_center_df = shopping_center_df.groupby(['zipcode','type']).sum().transpose().stack(0).reset_index()
shopping_center_df  = shopping_center_df.fillna(0).reset_index()

In [65]:
shopping_center_df  = shopping_center_df.drop(['level_0'],axis=1, inplace = False)
shopping_center_df.head(5)

type,index,zipcode,Community,Convenience,Neighborhood,Regional,Super-Regional
0,0,28031,458404.0,531130.0,437427.0,1291376.0,0.0
1,1,28036,0.0,44017.0,0.0,0.0,0.0
2,2,28078,157961.0,57189.0,279598.0,0.0,0.0
3,3,28104,0.0,0.0,206489.0,0.0,0.0
4,4,28105,758467.0,41481.0,193602.0,1379701.0,0.0


In [66]:
shopping_center_df = shopping_center_df.drop(['index'],axis=1, inplace = False)

In [67]:
shopping_center_df.shape

(30, 6)

In [68]:
shopping_center_df = shopping_center_df[shopping_center_df.zipcode.isin(city_demographics_df.zipcode)]
shopping_center_df.shape

(24, 6)

In [69]:
shopping_center_onehot_df = shopping_center_df.drop(['zipcode'],axis=1, inplace = False)
shopping_center_onehot_df.shape

(24, 5)

In [70]:
#using standard scaler to standardize the data and produce clusters
cluster_dataset = StandardScaler().fit_transform(shopping_center_onehot_df)

k_means = KMeans(init="k-means++", n_clusters=num_clusters, random_state = 0,n_init=12)
k_means.fit(cluster_dataset)
k_means.labels_

array([0, 1, 0, 1, 0, 0, 1, 2, 2, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 2, 1, 1, 2,
       0], dtype=int32)

In [71]:
shopping_center_df['sc_cluster']=k_means.labels_

In [72]:
shopping_center_df.head(2)

type,zipcode,Community,Convenience,Neighborhood,Regional,Super-Regional,sc_cluster
6,28202,0.0,0.0,0.0,351954.0,0.0,0
7,28203,326520.0,212270.0,84557.0,0.0,0.0,1


In [73]:
shopping_center_df.shape

(24, 7)

### Shopping Center Cluster Analysis

In [74]:
shopping_center_df.groupby('sc_cluster').sum()

type,Community,Convenience,Neighborhood,Regional,Super-Regional
sc_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,732369.0,720687.0,961719.0,2292697.0,0.0
1,3574551.0,2015678.0,2060457.0,4571237.0,890000.0
2,1838032.0,527384.0,1746960.0,3641737.0,4714003.0


### cluster 0 - Less Desirable

In [75]:
shopping_center_df.loc[shopping_center_df.sc_cluster==0,shopping_center_df.columns[range(0, len(shopping_center_df.columns))]]

type,zipcode,Community,Convenience,Neighborhood,Regional,Super-Regional,sc_cluster
6,28202,0.0,0.0,0.0,351954.0,0.0,0
8,28204,0.0,106919.0,0.0,407803.0,0.0,0
10,28206,254145.0,121469.0,76048.0,323793.0,0.0,0
11,28207,0.0,78827.0,77688.0,0.0,0.0,0
15,28211,130698.0,101437.0,74746.0,310546.0,0.0,0
18,28214,0.0,120276.0,164591.0,0.0,0.0,0
19,28215,347526.0,85023.0,0.0,302432.0,0.0,0
23,28227,0.0,51519.0,404527.0,0.0,0.0,0
29,28278,0.0,55217.0,164119.0,596169.0,0.0,0


### Cluster #1 - Highly Desirable

In [76]:
shopping_center_df.loc[shopping_center_df.sc_cluster==1,shopping_center_df.columns[range(0, len(shopping_center_df.columns))]]

type,zipcode,Community,Convenience,Neighborhood,Regional,Super-Regional,sc_cluster
7,28203,326520.0,212270.0,84557.0,0.0,0.0,1
9,28205,797066.0,113261.0,81999.0,0.0,0.0,1
12,28208,438404.0,141619.0,427303.0,0.0,0.0,1
16,28212,354116.0,259613.0,470553.0,0.0,0.0,1
17,28213,328972.0,239427.0,101552.0,686347.0,890000.0,1
20,28216,190939.0,209520.0,257795.0,311888.0,0.0,1
21,28217,431771.0,104603.0,96566.0,891140.0,0.0,1
22,28226,344296.0,130948.0,224566.0,329148.0,0.0,1
24,28262,362467.0,232827.0,72259.0,745951.0,0.0,1
26,28270,0.0,168280.0,80962.0,1206763.0,0.0,1


### Cluster 2 - Desirable

In [77]:
shopping_center_df.loc[shopping_center_df.sc_cluster==2,shopping_center_df.columns[range(0, len(shopping_center_df.columns))]]

type,zipcode,Community,Convenience,Neighborhood,Regional,Super-Regional,sc_cluster
13,28209,591111.0,114072.0,336615.0,0.0,1794000.0,2
14,28210,191970.0,50148.0,504062.0,1086661.0,849003.0,2
25,28269,181423.0,270764.0,420373.0,706327.0,2071000.0,2
28,28277,873528.0,92400.0,485910.0,1848749.0,0.0,2


## Visual Representation 

In [78]:
sc_clustering_map_df = shopping_center_df[['zipcode','sc_cluster']].copy()
sc_clustering_map_df.shape

(24, 2)

In [79]:
shopping_cluster_label_dict = {0: 'Yes',1:'Yes',2:'Maybe'}
sc_clustering_map_df['sc_lean']= sc_clustering_map_df['sc_cluster'].apply(lambda x:shopping_cluster_label_dict[x])
sc_clustering_map_df.shape

(24, 3)

In [80]:
sc_clustering_map_df.insert(loc =1, column ='latitude', value=0.0)
sc_clustering_map_df.insert(loc =2, column ='longitude', value=0.0)

In [81]:
city_demographics_df.head(2)

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income,demo_cluster,demo_lean,crime_per1000,crime_cluster,crime_cluster_lean
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0,2,Yes,0.001429,2.0,Maybe
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0,2,Yes,0.001944,2.0,Maybe


In [82]:
sc_clustering_map_df.head(2)

type,zipcode,latitude,longitude,sc_cluster,sc_lean
6,28202,0.0,0.0,0,Yes
7,28203,0.0,0.0,1,Yes


In [83]:
sc_clustering_map_df.set_index('zipcode', inplace=True)
sc_clustering_map_df.update(city_demographics_df.set_index('zipcode'))
sc_clustering_map_df=sc_clustering_map_df.reset_index()  # to recover the initial structure"

In [84]:
city_demographics_df = pd.merge(city_demographics_df, sc_clustering_map_df[['zipcode','sc_cluster','sc_lean']], how='left',on='zipcode')

In [85]:
city_demographics_df.head(2)

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income,demo_cluster,demo_lean,crime_per1000,crime_cluster,crime_cluster_lean,sc_cluster,sc_lean
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0,2,Yes,0.001429,2.0,Maybe,0,Yes
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0,2,Yes,0.001944,2.0,Maybe,1,Yes


In [86]:
sc_clustering_map_df

type,zipcode,latitude,longitude,sc_cluster,sc_lean
0,28202,35.23,-80.84,0,Yes
1,28203,35.21,-80.86,1,Yes
2,28204,35.22,-80.83,0,Yes
3,28205,35.22,-80.79,1,Yes
4,28206,35.25,-80.82,0,Yes
5,28207,35.2,-80.82,0,Yes
6,28208,35.24,-80.91,1,Yes
7,28209,35.18,-80.85,2,Maybe
8,28210,35.13,-80.85,2,Maybe
9,28211,35.17,-80.79,0,Yes


In [87]:
shopping_clt = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lon, zipcode, cluster, cluster_lean in zip(sc_clustering_map_df['latitude'], sc_clustering_map_df['longitude'],
                                                     sc_clustering_map_df['zipcode'], sc_clustering_map_df['sc_cluster'],
                                                    sc_clustering_map_df['sc_lean']):
    label = folium.Popup(str(zipcode) + ' Shopping Cluster: ' + str(cluster_lean), parse_html=True)
    folium.CircleMarker([lat, lon],radius=10,popup=label,color=color_dict[cluster_lean],fill=False,fill_opacity=0).add_to(shopping_clt)
shopping_clt


In [88]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list =row['venue.categories']
        
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

In [89]:
# The code was removed by Watson Studio for sharing.

In [90]:
def get_search_results (query,latitude,longitude,radius=2000,limit=500):
    url ='https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    latitude,
    longitude,
    VERSION,
    query,
    radius,
    limit)
    
    results = requests.get(url).json()
    try:
        venues = results['response']['venues']
        venues_detail = json_normalize(venues)
        venues_detail['categories']=venues_detail.apply(get_category_type, axis=1)
        columns = ['categories','location.postalCode','name']
        venues_detail = venues_detail[columns]
        venues_detail.columns = ['category','zipcode','name']
        return venues_detail
    except:
        emptyframe = pd.DataFrame(columns=['category','zipcode','name'])
        return emptyframe

In [91]:
restaurants_by_zip = pd.DataFrame(columns=['category','zipcode','name'])

In [92]:
for zipcode in city_demographics_df['zipcode']:
    result = search.by_zipcode(zipcode)
    if result:
        temp = get_search_results ('restaurant',result.lat,result.lng)
        if temp.empty:
            pass
        else:
            restaurants_by_zip = restaurants_by_zip.append(temp)

In [93]:
restaurants_by_zip.shape

(297, 3)

In [94]:
restaurants_by_zip = restaurants_by_zip[restaurants_by_zip.zipcode.isin(city_demographics_df.zipcode)]
restaurants_by_zip.shape

(278, 3)

In [95]:
# mapping 43 or so different kinds of restaurants down to a few so that I can make sense of the clusters.
category_dict={'Food':'Unknown','American Restaurant':'American','Italian Restaurant':'European','Mexican Restaurant':'Latin American','Fast Food Restaurant':'Unknown',
               'Restaurant':'Unknown','Greek Restaurant':'European','Chinese Restaurant':'Asian','New American Restaurant':'American','Southern / Soul Food Restaurant':'American',
               'Japanese Restaurant':'Asian','Thai Restaurant':'Asian','Pub':'Drinking establishment','French Restaurant':'European','Ethiopian Restaurant':'African','Latin American Restaurant':'Latin American',
               'Asian Restaurant':'Asian','Caribbean Restaurant':'African','Office':'Unknown','Hotel':'Unknown','Diner':'Unknown','Spanish Restaurant':'Latin American','Seafood Restaurant':'Latin American','Miscellaneous Shop':'Unknown',
               'Bar':'Drinking establishment','Sushi Restaurant':'Asian','Breakfast Spot':'Unknown','Food Service':'Unknown','Cuban Restaurant':'Latin American','Indian Restaurant':'Asian',
               'Middle Eastern Restaurant':'Asian','Brewery':'Drinking establishment','Steakhouse':'Latin American','Argentinian Restaurant':'Latin American','Salad Place':'Unknown',
               'Sports Bar':'American','Karaoke Bar':'Asian','Bowling Alley':'Unknown','General College & University':'Unknown','Theme Restaurant':'Unknown','Vietnamese Restaurant':'Asian',
               'Kitchen Supply Store':'Unknown','Peruvian Restaurant':'Latin American','Beer Garden':'American','Colombian Restaurant':'Latin American','None':'Unknown'}

In [96]:
restaurants_by_zip['category']= restaurants_by_zip['category'].apply(lambda x:category_dict.get(x, x))
restaurants_by_zip.shape

(278, 3)

In [97]:
restaurant_agg_df = restaurants_by_zip.groupby(['zipcode','category']).count().transpose().stack(0)


In [98]:
restaurant_agg_df.reset_index(inplace=True)

In [99]:
restaurant_agg_df.drop(['level_0'],axis=1, inplace= True)
restaurant_agg_df  = restaurant_agg_df.fillna(0).reset_index()
restaurant_agg_df

category,index,zipcode,African,American,Asian,Drinking establishment,European,Latin American,Unknown
0,0,28202,0.0,35.0,3.0,7.0,4.0,5.0,24.0
1,1,28203,0.0,0.0,4.0,1.0,6.0,0.0,7.0
2,2,28204,3.0,7.0,3.0,0.0,7.0,0.0,12.0
3,3,28205,3.0,6.0,6.0,0.0,1.0,11.0,13.0
4,4,28206,0.0,3.0,0.0,0.0,0.0,1.0,6.0
5,5,28207,0.0,4.0,2.0,0.0,4.0,0.0,1.0
6,6,28208,0.0,1.0,2.0,0.0,0.0,0.0,8.0
7,7,28209,0.0,1.0,2.0,2.0,1.0,0.0,2.0
8,8,28210,0.0,0.0,0.0,0.0,1.0,1.0,2.0
9,9,28211,0.0,1.0,3.0,0.0,0.0,1.0,2.0


In [100]:
restaurant_agg_df.drop(['index'],axis=1, inplace= True)
restaurant_agg_df.shape

(21, 8)

In [101]:
restaurant_onehot_df = restaurant_agg_df.copy()

In [102]:
restaurant_onehot_df.insert(loc=1,column='population', value = city_demographics_df['population'])

In [103]:
restaurant_onehot_df.head(2)

category,zipcode,population,African,American,Asian,Drinking establishment,European,Latin American,Unknown
0,28202,11195,0.0,35.0,3.0,7.0,4.0,5.0,24.0
1,28203,11315,0.0,0.0,4.0,1.0,6.0,0.0,7.0


In [104]:
# dividing each column by population from that zipcode so that clustering is done on standardized data across zipcodes
restaurant_onehot_df[['African','American','Asian','Drinking establishment','European','Latin American','Unknown']].div(restaurant_onehot_df.population, axis=0)

category,African,American,Asian,Drinking establishment,European,Latin American,Unknown
0,0.0,0.003126,0.000268,0.000625,0.000357,0.000447,0.002144
1,0.0,0.0,0.000354,8.8e-05,0.00053,0.0,0.000619
2,0.000626,0.00146,0.000626,0.0,0.00146,0.0,0.002502
3,6.8e-05,0.000137,0.000137,0.0,2.3e-05,0.00025,0.000296
4,0.0,0.000252,0.0,0.0,0.0,8.4e-05,0.000504
5,0.0,0.000431,0.000216,0.0,0.000431,0.0,0.000108
6,0.0,2.9e-05,5.9e-05,0.0,0.0,0.0,0.000234
7,0.0,4.9e-05,9.8e-05,9.8e-05,4.9e-05,0.0,9.8e-05
8,0.0,0.0,0.0,0.0,2.4e-05,2.4e-05,4.7e-05
9,0.0,3.5e-05,0.000105,0.0,0.0,3.5e-05,7e-05


In [105]:
restaurant_onehot_df = restaurant_agg_df.drop(['zipcode'],axis=1, inplace = False)
restaurant_onehot_df.shape

(21, 7)

In [106]:
#using standard scaler to standardize the data and produce clusters
cluster_dataset = StandardScaler().fit_transform(restaurant_onehot_df)

k_means = KMeans(init="k-means++", n_clusters=num_clusters, random_state = 0,n_init=12)
k_means.fit(cluster_dataset)
k_means.labels_

array([1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [107]:
restaurant_agg_df.insert (loc = 1, column ='r_cluster',value = k_means.labels_)

In [108]:
restaurant_agg_df.head(2)

category,zipcode,r_cluster,African,American,Asian,Drinking establishment,European,Latin American,Unknown
0,28202,1,0.0,35.0,3.0,7.0,4.0,5.0,24.0
1,28203,2,0.0,0.0,4.0,1.0,6.0,0.0,7.0


### Restaurant Cluster Analysis

In [109]:
restaurant_agg_df.groupby('r_cluster').mean().reset_index()

category,r_cluster,African,American,Asian,Drinking establishment,European,Latin American,Unknown
0,0,0.176471,1.0,1.058824,0.117647,0.647059,0.823529,2.470588
1,1,0.0,35.0,3.0,7.0,4.0,5.0,24.0
2,2,2.0,4.333333,4.333333,0.333333,4.666667,3.666667,10.666667


### Cluster #0 : Fewer Restaurants - Yes

In [110]:
restaurant_agg_df.loc[restaurant_agg_df.r_cluster==0,restaurant_agg_df.columns[range(0,len(restaurant_agg_df.columns))]]

category,zipcode,r_cluster,African,American,Asian,Drinking establishment,European,Latin American,Unknown
4,28206,0,0.0,3.0,0.0,0.0,0.0,1.0,6.0
5,28207,0,0.0,4.0,2.0,0.0,4.0,0.0,1.0
6,28208,0,0.0,1.0,2.0,0.0,0.0,0.0,8.0
7,28209,0,0.0,1.0,2.0,2.0,1.0,0.0,2.0
8,28210,0,0.0,0.0,0.0,0.0,1.0,1.0,2.0
9,28211,0,0.0,1.0,3.0,0.0,0.0,1.0,2.0
10,28212,0,2.0,1.0,0.0,0.0,0.0,5.0,10.0
11,28213,0,0.0,1.0,1.0,0.0,0.0,2.0,1.0
12,28215,0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
13,28216,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


### Cluster 1 - Super competetive - No

In [111]:
restaurant_agg_df.loc[restaurant_agg_df.r_cluster==1,restaurant_agg_df.columns[range(0,len(restaurant_agg_df.columns))]]

category,zipcode,r_cluster,African,American,Asian,Drinking establishment,European,Latin American,Unknown
0,28202,1,0.0,35.0,3.0,7.0,4.0,5.0,24.0


### Cluster 2 - High # of Asian Restaurants - Maybe

In [112]:
restaurant_agg_df.loc[restaurant_agg_df.r_cluster==2,restaurant_agg_df.columns[range(0,len(restaurant_agg_df.columns))]]

category,zipcode,r_cluster,African,American,Asian,Drinking establishment,European,Latin American,Unknown
1,28203,2,0.0,0.0,4.0,1.0,6.0,0.0,7.0
2,28204,2,3.0,7.0,3.0,0.0,7.0,0.0,12.0
3,28205,2,3.0,6.0,6.0,0.0,1.0,11.0,13.0


   ### Visaul representation

In [113]:
restaurant_map_df = restaurant_agg_df[['zipcode','r_cluster']].copy()
restaurant_map_df.shape

(21, 2)

In [114]:
restaurant_cluster_label_dict = {0: 'Yes',1:'No',2:'Maybe'}
restaurant_map_df['r_lean']= restaurant_map_df['r_cluster'].apply(lambda x:restaurant_cluster_label_dict[x])
restaurant_map_df.shape

(21, 3)

In [115]:
restaurant_map_df.insert(loc =1, column ='latitude', value=0.0)
restaurant_map_df.insert(loc =2, column ='longitude', value=0.0)

In [116]:
restaurant_map_df.set_index('zipcode', inplace=True)
restaurant_map_df.update(city_demographics_df.set_index('zipcode'))
restaurant_map_df_df=restaurant_map_df.reset_index()  # to recover the initial structure"

In [117]:
restaurant_map_df=restaurant_map_df.reset_index()

In [118]:
city_demographics_df = pd.merge(city_demographics_df, restaurant_map_df[['zipcode','r_cluster','r_lean']], how='left',on='zipcode')

In [119]:
competition_clt = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lon, zipcode, cluster, cluster_lean in zip(restaurant_map_df['latitude'], restaurant_map_df['longitude'],
                                                     restaurant_map_df['zipcode'], restaurant_map_df['r_cluster'],
                                                    restaurant_map_df['r_lean']):
    label = folium.Popup(str(zipcode) + ' Restaurant Cluster: ' + str(cluster_lean), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=15,
        popup=label,
        color=color_dict[cluster_lean],
        fill=False,
        fill_opacity=0).add_to(competition_clt)
competition_clt

In [120]:
city_demographics_df.head(5)

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income,demo_cluster,demo_lean,crime_per1000,crime_cluster,crime_cluster_lean,sc_cluster,sc_lean,r_cluster,r_lean
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0,2,Yes,0.001429,2.0,Maybe,0,Yes,1.0,No
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0,2,Yes,0.001944,2.0,Maybe,1,Yes,2.0,Maybe
2,28204,35.22,-80.83,4796,2774.0,304600.0,56286.0,2,Yes,0.000834,0.0,Yes,0,Yes,2.0,Maybe
3,28205,35.22,-80.79,43931,3716.0,160100.0,35310.0,0,No,0.000615,0.0,Yes,1,Yes,2.0,Maybe
4,28206,35.25,-80.82,11898,1686.0,86400.0,21087.0,0,No,0.00395,1.0,No,0,Yes,0.0,Yes


## Presenting all the data together in a layered map -  28207 is the winner, 28211 and 28226 are close second.

In [121]:
layered_clt = folium.Map(location=[latitude, longitude], zoom_start=11)

fg = folium.FeatureGroup(name='All')
layered_clt.add_child(fg)
g1 = plugins.FeatureGroupSubGroup(fg, 'Demographics Lean ')
layered_clt.add_child(g1)

g2 = plugins.FeatureGroupSubGroup(fg, 'Crime lean')
layered_clt.add_child(g2)

g3 = plugins.FeatureGroupSubGroup(fg, 'Shopping Center Lean')
layered_clt.add_child(g3)

g4 = plugins.FeatureGroupSubGroup(fg, 'Competition Lean')
layered_clt.add_child(g4)

for zipcode,lat, lon,cluster,cluster_lean in zip(zip_map_df['zipcode'],zip_map_df['latitude'],zip_map_df['longitude'],zip_map_df['demo_cluster'],zip_map_df['demo_lean']):
    label = folium.Popup(str(zipcode) + ' Demographic lean:' + str(cluster_lean), parse_html=True)
    folium.CircleMarker([lat, lon], radius=20, popup=label, color=color_dict[cluster_lean], fill=False, fill_opacity=0).add_to(g1)
    

for zipcode,dummy,cluster,cluster_lean,lat,lon in zip(agg_crime_df['zipcode'],agg_crime_df['crime_per1000'],agg_crime_df['crime_cluster'],
                                                 agg_crime_df['crime_cluster_lean'],agg_crime_df['latitude'],agg_crime_df['longitude']):
    label = folium.Popup(str(zipcode) + ' Crime Cluster lean: ' + str(cluster_lean), parse_html=True)
    folium.CircleMarker([lat, lon],radius=5,popup=label,color=color_dict.get(cluster_lean, 'yellow'),fill=False,fill_opacity=0).add_to(g2)



for lat, lon, zipcode, cluster, cluster_lean in zip(sc_clustering_map_df['latitude'], sc_clustering_map_df['longitude'],
                                                     sc_clustering_map_df['zipcode'], sc_clustering_map_df['sc_cluster'],
                                                    sc_clustering_map_df['sc_lean']):
    label = folium.Popup(str(zipcode) + ' Shopping Cluster: ' + str(cluster_lean), parse_html=True)
    folium.CircleMarker( [lat, lon], radius=10, popup=label, color=color_dict[cluster_lean], fill=False, fill_opacity=0).add_to(g3)    
    
for lat, lon, zipcode, cluster, cluster_lean in zip(restaurant_map_df['latitude'], restaurant_map_df['longitude'],
                                                     restaurant_map_df['zipcode'], restaurant_map_df['r_cluster'],
                                                    restaurant_map_df['r_lean']):
    label = folium.Popup(str(zipcode) + ' Restaurant Cluster: ' + str(cluster_lean), parse_html=True)
    folium.CircleMarker([lat, lon],radius=15,popup=label,color=color_dict[cluster_lean],fill=False,fill_opacity=0).add_to(g4)  
    
folium.LayerControl(collapsed=False).add_to(layered_clt)
layered_clt

In [122]:
city_demographics_df.head(10)

Unnamed: 0,zipcode,latitude,longitude,population,population_density,median_home_value,median_household_income,demo_cluster,demo_lean,crime_per1000,crime_cluster,crime_cluster_lean,sc_cluster,sc_lean,r_cluster,r_lean
0,28202,35.23,-80.84,11195,6213.0,251200.0,70300.0,2,Yes,0.001429,2.0,Maybe,0,Yes,1.0,No
1,28203,35.21,-80.86,11315,3411.0,367400.0,64604.0,2,Yes,0.001944,2.0,Maybe,1,Yes,2.0,Maybe
2,28204,35.22,-80.83,4796,2774.0,304600.0,56286.0,2,Yes,0.000834,0.0,Yes,0,Yes,2.0,Maybe
3,28205,35.22,-80.79,43931,3716.0,160100.0,35310.0,0,No,0.000615,0.0,Yes,1,Yes,2.0,Maybe
4,28206,35.25,-80.82,11898,1686.0,86400.0,21087.0,0,No,0.00395,1.0,No,0,Yes,0.0,Yes
5,28207,35.2,-80.82,9280,3686.0,743500.0,119063.0,1,Strong Yes,,,,0,Yes,0.0,Yes
6,28208,35.24,-80.91,34167,1553.0,86400.0,28435.0,0,No,0.002694,1.0,No,1,Yes,0.0,Yes
7,28209,35.18,-80.85,20317,3705.0,268300.0,60180.0,2,Yes,0.000439,0.0,Yes,2,Maybe,0.0,Yes
8,28210,35.13,-80.85,42263,3327.0,242500.0,54915.0,2,Yes,0.000148,0.0,Yes,2,Maybe,0.0,Yes
9,28211,35.17,-80.79,28523,2647.0,366700.0,70403.0,2,Yes,7.1e-05,0.0,Yes,0,Yes,0.0,Yes


In [123]:
city_demographics_df[['demo_lean','crime_cluster_lean','sc_lean','r_lean']]

Unnamed: 0,demo_lean,crime_cluster_lean,sc_lean,r_lean
0,Yes,Maybe,Yes,No
1,Yes,Maybe,Yes,Maybe
2,Yes,Yes,Yes,Maybe
3,No,Yes,Yes,Maybe
4,No,No,Yes,Yes
5,Strong Yes,,Yes,Yes
6,No,No,Yes,Yes
7,Yes,Yes,Maybe,Yes
8,Yes,Yes,Maybe,Yes
9,Yes,Yes,Yes,Yes


In [124]:
city_demographics_df.rename(index=str, columns={'demo_lean':'demographic lean','crime_cluster_lean':'safety lean','sc_lean':'economic activity lean','r_lean':'competition lean'}, inplace =True)

In [125]:
final_tally_df = city_demographics_df[['zipcode','demographic lean','safety lean','economic activity lean','competition lean']].copy()

## Final Tally - 28207 is the winner, 28211 and 28226 are close second.

In [126]:
final_tally_df.fillna('None')

Unnamed: 0,zipcode,demographic lean,safety lean,economic activity lean,competition lean
0,28202,Yes,Maybe,Yes,No
1,28203,Yes,Maybe,Yes,Maybe
2,28204,Yes,Yes,Yes,Maybe
3,28205,No,Yes,Yes,Maybe
4,28206,No,No,Yes,Yes
5,28207,Strong Yes,,Yes,Yes
6,28208,No,No,Yes,Yes
7,28209,Yes,Yes,Maybe,Yes
8,28210,Yes,Yes,Maybe,Yes
9,28211,Yes,Yes,Yes,Yes
