## Data Science Capstone
#### Mapping and Comparison of two cities to assist in relocation within the U.S

In [1]:
import pandas as pd
import numpy as np
import re 
import itertools
from bs4 import BeautifulSoup
from lxml import html
import requests

from geopy.geocoders import Nominatim

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium

from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

Solving environment: done

# All requested packages already installed.



#### Function to Scrape City-Data.com

In [None]:
# page = requests.get('http://www.city-data.com/nbmaps/neigh-Richmond-Virginia.html', verify=False).text
# soupobj=BeautifulSoup(page,'lxml')
# neighborhood_divs=soupobj.findAll("div",{"class": "neighborhood"})

# # RVA median income print(div.findAll(text=True)[22].strip())
# # RVA household price

# for div in neighborhood_divs:
#     #print(div)
#     span=div.find("span", {"class": "street-name"})

#     #neighborhoods.append(span.text)
#     print(div.findAll(text=True)[22].strip())
# #     med_income.append(div.findAll(text=True)[20].strip()) 
# #     link=div.findAll('a', href=re.compile('^/neighborhood/'))
# # #print(link) find_all('a')[2]["href"]
# #     links.append(div.find_all('a')[1]["href"])

In [2]:
def citypage_scraper(url):

	page = requests.get(url, verify=False).text
	soupobj=BeautifulSoup(page,'lxml')
	
	neighborhoods=[]
	pop_dens=[]
	med_income=[]
	city_medinc=[]
	city_medrent=[]
	med_rent=[]
	links=[]
	zipcodes=[]
	neighborhood_divs=soupobj.findAll("div",{"class": "neighborhood"})

	for div in neighborhood_divs:
		span=div.find("span", {"class": "street-name"})
		neighborhoods.append(span.text)
		pop_dens.append(div.findAll(text=True)[13].strip())
		med_income.append(div.findAll(text=True)[20].strip()) 
		city_medinc.append(div.findAll(text=True)[22].strip())
		city_medrent.append(div.findAll(text=True)[27].strip())
		med_rent.append(div.findAll(text=True)[25].strip())
		link=div.findAll('a', href=re.compile('^/neighborhood/'))
		#print(link) find_all('a')[2]["href"]
		links.append(div.find_all('a')[1]["href"])
		
	for link in links:
		fulllink='http://www.city-data.com/'+link
		det_page=BeautifulSoup(requests.get(fulllink).text,'lxml')
		header=det_page.select('h1.city')[0].text.strip()
		zipcodes.append(re.findall(r'\d{5}', header))


#Cleaning the Neighborhood list
	for i, s in enumerate(pop_dens):
		pop_dens[i] = s.replace(',','')
        
	for i, s in enumerate(med_income):
		med_income[i] = s.replace(',', '').replace('$', '')
        
	for i, s in enumerate(med_rent):
		med_rent[i] = s.replace(',', '').replace('$', '')

#Cleaning the city lists        
	for i, s in enumerate(city_medinc):
		city_medinc[i] = s.replace(',', '').replace('$', '')
        
	for i, s in enumerate(city_medrent):
		city_medrent[i] = s.replace(',', '').replace('$', '')

	return zipcodes,neighborhoods,pop_dens,med_income,city_medinc ,med_rent, city_medrent

In [3]:
#For example, choosing two cities with similar populations
url1='http://www.city-data.com/nbmaps/neigh-Richmond-Virginia.html'
url2='http://www.city-data.com/nbmaps/neigh-Stockton-California.html'
zipcodes1,neighborhoods1,pop_dens1,med_income1,city_medinc1,med_rent1, city_medrent1=citypage_scraper(url1)
zipcodes2,neighborhoods2,pop_dens2,med_income2,city_medinc2,med_rent2, city_medrent2=citypage_scraper(url2)

#### Transform City Data into Pandas Dataframe

In [4]:
tuples1=list(zip(zipcodes1, neighborhoods1,pop_dens1,med_income1,city_medinc1, med_rent1,city_medrent1))
tuples2=list(zip(zipcodes2, neighborhoods2,pop_dens2,med_income2,city_medinc2, med_rent2,city_medrent2))
df1=pd.DataFrame(tuples1,columns=['ZipCodes','Neighborhood','PopulationDensity','MedianIncome','CityMedIncome','MedianRent','CityMedRent'])
df2=pd.DataFrame(tuples2,columns=['ZipCodes','Neighborhood','PopulationDensity','MedianIncome','CityMedIncome','MedianRent','CityMedRent'])

def melt_series(s):
    lengths = s.str.len().values
    flat = [i for i in itertools.chain.from_iterable(s.values.tolist())]
    idx = np.repeat(s.index.values, lengths)
    return pd.Series(flat, idx, name=s.name)


dfcity1=melt_series(df1.ZipCodes).to_frame().join(df1.drop('ZipCodes', 1)).reindex_axis(df1.columns, 1)
dfcity2=melt_series(df2.ZipCodes).to_frame().join(df2.drop('ZipCodes', 1)).reindex_axis(df2.columns, 1)



#### Add Latitude and Longitude to City Data Frames

In [5]:
geolocator = Nominatim(user_agent="LocExplorer")

def locfinder(df):
    lats=[]
    longs=[]
    for zipcode in df.ZipCodes:
    #print(zipcode)
        loc=geolocator.geocode(zipcode)
        lats.append(loc.latitude)
        longs.append(loc.longitude)
    return lats,longs

lat1,long1=locfinder(dfcity1)
lat2,long2=locfinder(dfcity2)

In [6]:
def object_converter(df,lat,long):
    df['Latitude'] = lat
    df['Longitude']=long
    df['PopulationDensity']=df['PopulationDensity'].astype(str).astype(int)
    df['MedianIncome']=df['MedianIncome'].astype(str).astype(int)
    df['MedianRent']=df['MedianRent'].astype(str).astype(int)
    df['CityMedIncome']=df['CityMedIncome'].astype(str).astype(int)
    df['CityMedRent']=df['CityMedRent'].astype(str).astype(int)
    return df

#### A little quality control, change Population Density and Median Numbers into Categorical Variables

In [7]:
dfcity1=object_converter(dfcity1,lat1,long1)
dfcity2=object_converter(dfcity2,lat2,long2)

In [8]:
dfcity1.head()

Unnamed: 0,ZipCodes,Neighborhood,PopulationDensity,MedianIncome,CityMedIncome,MedianRent,CityMedRent,Latitude,Longitude
0,23222,Barton Heights,3869,35508,42373,743,783,37.574507,-77.425628
1,23222,Barton Heights Cemeteries,4174,27500,42373,773,783,37.574507,-77.425628
2,23222,Battery Park,4856,44386,42373,673,783,37.574507,-77.425628
3,23220,Belle Isle,1830,34103,42373,628,783,37.553784,-77.445697
4,23227,Bellevue,3166,45852,42373,964,783,37.60341,-77.446582


In [9]:
# Do the math to change medianincome and median rent to percentages against the city median. Convert values to categorical 'High','Medium', 'Low'
def categorical_maker(df):

    df['MedianIncome'] = df['MedianIncome']/df['CityMedIncome']*100
    df['MedianRent']=df['MedianRent']/df['CityMedRent']*100


    df.loc[df.PopulationDensity <= 4000, 'PopDens'] = 'LowDens' 
    df.loc[(df.PopulationDensity > 4000) & (df.PopulationDensity <= 8000), 'PopDens'] = 'MediumDens' 
    df.loc[df.PopulationDensity > 10000 , 'PopDens'] = 'HighDens' 

    df.loc[df.MedianIncome <= 75, 'MedInc'] = 'LowInc' 
    df.loc[(df.MedianIncome > 75) & (df.MedianIncome <= 100), 'MedInc'] = 'MediumInc' 
    df.loc[df.MedianIncome > 100 , 'MedInc'] = 'HighInc' 

    df.loc[df.MedianRent <= 75, 'MedRent'] = 'LowRent' 
    df.loc[(df.MedianRent > 75) & (df.MedianRent <= 100), 'MedRent'] = 'MediumRent' 
    df.loc[df.MedianRent > 100 , 'MedRent'] = 'HighRent'

    df=df.drop(['PopulationDensity', 'MedianIncome','MedianRent','CityMedIncome','CityMedRent'], axis=1)
    
    return df


In [10]:
dfcity1=categorical_maker(dfcity1)
dfcity2=categorical_maker(dfcity2)
dfcity1.head()

Unnamed: 0,ZipCodes,Neighborhood,Latitude,Longitude,PopDens,MedInc,MedRent
0,23222,Barton Heights,37.574507,-77.425628,LowDens,MediumInc,MediumRent
1,23222,Barton Heights Cemeteries,37.574507,-77.425628,MediumDens,LowInc,MediumRent
2,23222,Battery Park,37.574507,-77.425628,MediumDens,HighInc,MediumRent
3,23220,Belle Isle,37.553784,-77.445697,LowDens,MediumInc,MediumRent
4,23227,Bellevue,37.60341,-77.446582,LowDens,HighInc,HighRent


#### Map the Two Cities

In [11]:
address='Richmond, VA'
geolocator = Nominatim(user_agent="City_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_city1 = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(dfcity1['Latitude'], dfcity1['Longitude'], dfcity1['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_city1)  
    
map_city1

In [12]:
address2='Stockton, CA'
geolocator = Nominatim(user_agent="City_explorer")
location2 = geolocator.geocode(address2)
latitude2 = location2.latitude
longitude2 = location2.longitude
map_city2 = folium.Map(location=[latitude2, longitude2], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(dfcity2['Latitude'], dfcity2['Longitude'], dfcity2['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_city2)  
    
map_city2

#### Explore venues using FourSquare API

In [13]:
CLIENT_ID = 'NHALHKECTHEG32XKAQMZVYSIGPHDAXB2DFYZJRUHPDEXDSE1' # your Foursquare ID
CLIENT_SECRET = 'BFYHXQKZK5UM3NM1M1VVF4HLKPGB4QG3WCGZBMCAMXIKB1S4' # your Foursquare Secret
VERSION = '20190530' # Foursquare API version
limit=100
radius=500

In [14]:
def getNearbyVenues(names, latitudes, longitudes, inc,pop,rent, radius=500):
    
    venues_list=[]
    for name, lat, lng, inc, pop, rent in zip(names, latitudes, longitudes, inc, pop,rent):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            inc,
            pop,  
            rent,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    #print(venues_list)
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])

    #print(nearby_venues)
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Neighborhood Median Income',
                  'Neighborhood Population Density',
                  'Neighborhood Median Rent',
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    #print(nearby_venues)
    return(nearby_venues)

In [15]:
city1_venues = getNearbyVenues(names=dfcity1['Neighborhood'],
                                   latitudes=dfcity1['Latitude'],
                                   longitudes=dfcity1['Longitude'],
                                   inc=dfcity1['MedInc'],
                                   pop=dfcity1['PopDens'],
                                   rent=dfcity1['MedRent'])
                                   
                            

In [16]:
city2_venues = getNearbyVenues(names=dfcity2['Neighborhood'],
                                   latitudes=dfcity2['Latitude'],
                                   longitudes=dfcity2['Longitude'],
                                   inc=dfcity2['MedInc'],
                                   pop=dfcity2['PopDens'],
                                   rent=dfcity2['MedRent']
                                   
                                  )

In [17]:
city1_venues.groupby('Neighborhood').count()
city2_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Neighborhood Median Income,Neighborhood Population Density,Neighborhood Median Rent,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Airport,4,4,4,4,4,4,4,4,4
Akers,24,24,24,24,24,24,24,24,24
Anderson,24,24,24,0,24,24,24,24,24
August,2,2,2,2,2,2,2,2,2
Bear Creek District,13,13,13,13,13,13,13,13,13
Brookside,1,1,1,1,1,1,1,1,1
Civic District,34,34,34,34,34,34,34,34,34
Colonial Heights,17,17,17,17,17,17,17,17,17
Country Club,2,2,2,2,2,2,2,2,2
Creekside/Wagner,3,3,3,3,3,3,3,3,3


#### One Hot Encoding for Neighborhoods

In [18]:
#Function to one hot encode Neighborhood Median Income	Neighborhood Population Density	Neighborhood Median Rent	Venue	Venue Latitude	Venue Longitude	Venue Category
def onehot(df):
    df_onehot=pd.get_dummies(data=df, columns=['Neighborhood Median Income','Neighborhood Population Density','Neighborhood Median Rent', 'Venue Category'], prefix="", prefix_sep="")

#     #df_onehot=pd.get_dummies(df[['Venue Category']], prefix="", prefix_sep="")
#     df_onehot['Neighborhood']=df['Neighborhood']
    df_onehot=df_onehot.drop(['Neighborhood Latitude', 'Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude'], axis=1)
    
    
#     fixed_columns = [df_onehot.columns[-1]] + list(df_onehot.columns[:-1])
#     df_onehot = df_onehot[fixed_columns]

    return df_onehot

In [19]:
city1onehot=onehot(city1_venues)
city2onehot=onehot(city2_venues)
city1onehot

Unnamed: 0,Neighborhood,HighInc,LowInc,MediumInc,HighDens,LowDens,MediumDens,HighRent,LowRent,MediumRent,...,Scenic Lookout,Seafood Restaurant,Southern / Soul Food Restaurant,Spa,Speakeasy,Sporting Goods Shop,Thrift / Vintage Store,Toy / Game Store,Volleyball Court,Wine Shop
0,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
2,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
5,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
6,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7,Belle Isle,0,0,1,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
8,Bellevue,1,0,0,0,1,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
9,Bellevue,1,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
#Group cities

city1_grouped = city1onehot.groupby('Neighborhood').mean().reset_index()
city2_grouped = city2onehot.groupby('Neighborhood').mean().reset_index()

city2_grouped

Unnamed: 0,Neighborhood,HighInc,LowInc,MediumInc,HighDens,LowDens,MediumDens,HighRent,LowRent,MediumRent,...,Rental Car Location,Sandwich Place,Seafood Restaurant,Smoke Shop,Supermarket,Taco Place,Tailor Shop,Theater,Turkish Restaurant,Wine Shop
0,Airport,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
1,Akers,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.041667,0.041667
2,Anderson,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.041667,0.041667
3,August,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0
4,Bear Creek District,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0
5,Brookside,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Civic District,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.058824,0.029412,0.0,0.029412,0.0,0.029412,0.029412,0.029412,0.0,0.0
7,Colonial Heights,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
8,Country Club,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Creekside/Wagner,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [57]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top attributes
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Attribute'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Attribute'.format(ind+1))

# create a new dataframe
city1_venues_sorteddf = pd.DataFrame(columns=columns)
city1_venues_sorteddf['Neighborhood'] = city1_grouped['Neighborhood']

for ind in np.arange(city1_grouped.shape[0]):
    city1_venues_sorteddf.iloc[ind, 1:] = return_most_common_venues(city1_grouped.iloc[ind, :], num_top_venues)

In [None]:
city1_venues_sorted.head()

In [58]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top attributes
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Attribute'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Attribute'.format(ind+1))

# create a new dataframe
city2_venues_sorteddf = pd.DataFrame(columns=columns)
city2_venues_sorteddf['Neighborhood'] = city2_grouped['Neighborhood']

for ind in np.arange(city2_grouped.shape[0]):
    city2_venues_sorteddf.iloc[ind, 1:] = return_most_common_venues(city2_grouped.iloc[ind, :], num_top_venues)

In [None]:
city2_venues_sorteddf.head()

#### Clustering Neighborhoods

In [59]:
kclusters = 8

city1_grouped_clustering = city1_grouped.drop('Neighborhood', 1)

city1_grouped_clustering
# # run k-means clustering
kmeans1 = KMeans(n_clusters=kclusters, random_state=0).fit(city1_grouped_clustering)
#kmeans.labels_[0:5] 
kmeans1.labels_

array([5, 2, 1, 6, 5, 0, 1, 1, 6, 5, 4, 0, 7, 0, 5, 1, 6, 1, 2, 3, 5, 7, 2,
       1, 2, 0, 4, 7, 4, 0, 0, 6, 6, 3, 5, 4, 0, 3, 1, 4, 3, 1, 0, 1, 1, 0,
       6, 3], dtype=int32)

In [60]:
kclusters = 8

city2_grouped_clustering = city2_grouped.drop('Neighborhood', 1)

city2_grouped_clustering
# # run k-means clustering
kmeans2 = KMeans(n_clusters=kclusters, random_state=0).fit(city2_grouped_clustering)
#kmeans.labels_[0:5] 
kmeans2.labels_

array([7, 3, 2, 5, 3, 1, 2, 3, 3, 3, 0, 7, 2, 1, 1, 5, 3, 0, 6, 5, 2, 6, 2,
       1, 3, 3, 4, 0, 4, 6, 0, 2, 5, 3, 3, 3, 2, 4, 2, 3, 3, 0, 7, 7, 6, 1,
       1, 3, 2, 7, 2, 0, 3, 5, 3, 2, 3, 3, 5], dtype=int32)

In [61]:
# add clustering labels
city1_venues_sorteddf.insert(0, 'Cluster Labels', kmeans1.labels_)

# # neighborhoods_venues_sorted
city1_merged = dfcity1

# # # # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
city1_merged = city1_merged.join(city1_venues_sorteddf.set_index('Neighborhood'), how='right',on='Neighborhood')

city1_merged.head() # check the last columns!

Unnamed: 0,ZipCodes,Neighborhood,Latitude,Longitude,PopDens,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
3,23220,Belle Isle,37.553784,-77.445697,LowDens,MediumInc,MediumRent,5,MediumInc,LowDens,MediumRent,Speakeasy,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Coffee Shop
4,23227,Bellevue,37.60341,-77.446582,LowDens,HighInc,HighRent,2,HighInc,LowDens,HighRent,Discount Store,Art Gallery,Casino,Chinese Restaurant,Fried Chicken Joint,Southern / Soul Food Restaurant,Thrift / Vintage Store
5,23224,Blackwell,37.519435,-77.419623,LowDens,MediumInc,HighRent,1,MediumInc,LowDens,HighRent,Park,Caribbean Restaurant,Grocery Store,Volleyball Court,Seafood Restaurant,Harbor / Marina,Casino
6,23220,Boulevard,37.553784,-77.445697,,HighInc,HighRent,6,HighInc,HighRent,Speakeasy,Nightclub,Nightlife Spot,Historic Site,Coffee Shop,River,Garden,Donut Shop
6,23221,Boulevard,37.562376,-77.483962,,HighInc,HighRent,6,HighInc,HighRent,Speakeasy,Nightclub,Nightlife Spot,Historic Site,Coffee Shop,River,Garden,Donut Shop


In [62]:
city2_venues_sorteddf.insert(0, 'Cluster Labels', kmeans2.labels_)

# # neighborhoods_venues_sorted
city2_merged = dfcity2

# # # # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
city2_merged = city2_merged.join(city2_venues_sorteddf.set_index('Neighborhood'), how='right',on='Neighborhood')

city2_merged.head() # check the last columns!

Unnamed: 0,ZipCodes,Neighborhood,Latitude,Longitude,PopDens,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
0,95205,Airport,37.974233,-121.267864,LowDens,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Gas Station,Grocery Store,Taco Place,Wine Shop,Bus Station,Business Service,Café
0,95206,Airport,37.930699,-121.27557,LowDens,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Gas Station,Grocery Store,Taco Place,Wine Shop,Bus Station,Business Service,Café
0,95215,Airport,37.958887,-121.237743,LowDens,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Gas Station,Grocery Store,Taco Place,Wine Shop,Bus Station,Business Service,Café
1,95207,Akers,37.988492,-121.332081,MediumDens,HighInc,MediumRent,3,HighInc,MediumDens,MediumRent,Pizza Place,Japanese Restaurant,Gym / Fitness Center,French Restaurant,Financial or Legal Service,Turkish Restaurant,Fast Food Restaurant
1,95210,Akers,48.968175,2.284591,MediumDens,HighInc,MediumRent,3,HighInc,MediumDens,MediumRent,Pizza Place,Japanese Restaurant,Gym / Fitness Center,French Restaurant,Financial or Legal Service,Turkish Restaurant,Fast Food Restaurant


In [63]:
map_clusters1 = folium.Map(location=[37.540726,-77.436050], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city1_merged['Latitude'], city1_merged['Longitude'], city1_merged['Neighborhood'], city1_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters1)
       
map_clusters1

In [64]:
map_clusters2 = folium.Map(location=[37.957703,-121.290779], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(city2_merged['Latitude'], city2_merged['Longitude'], city2_merged['Neighborhood'], city2_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters2)
       
map_clusters2

In [65]:
city1_merged.loc[city1_merged['Cluster Labels'] == 0, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
8,Broad Street Commercial,LowInc,HighRent,0,LowInc,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Nightclub,Music Venue
8,Broad Street Commercial,LowInc,HighRent,0,LowInc,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Nightclub,Music Venue
16,Commonwealth Club,LowInc,HighRent,0,LowInc,HighDens,HighRent,Speakeasy,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Coffee Shop
18,Downtown,LowInc,HighRent,0,LowInc,MediumDens,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Music Venue
18,Downtown,LowInc,HighRent,0,LowInc,MediumDens,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Music Venue
30,Jackson Ward,LowInc,HighRent,0,LowInc,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Nightclub,Music Venue
30,Jackson Ward,LowInc,HighRent,0,LowInc,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Nightclub,Music Venue
34,Monroe Park,LowInc,HighRent,0,LowInc,HighDens,HighRent,Speakeasy,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Coffee Shop
35,Monroe Ward,LowInc,HighRent,0,LowInc,MediumDens,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Music Venue
35,Monroe Ward,LowInc,HighRent,0,LowInc,MediumDens,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Music Venue


In [66]:
city2_merged.loc[city2_merged['Cluster Labels'] == 0, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
10,Downtown,LowInc,LowRent,0,LowRent,MediumDens,LowInc,Mexican Restaurant,Coffee Shop,Bank,Rental Car Location,Harbor / Marina,Food,Department Store
10,Downtown,LowInc,LowRent,0,LowRent,MediumDens,LowInc,Mexican Restaurant,Coffee Shop,Bank,Rental Car Location,Harbor / Marina,Food,Department Store
10,Downtown,LowInc,LowRent,0,LowRent,MediumDens,LowInc,Mexican Restaurant,Coffee Shop,Bank,Rental Car Location,Harbor / Marina,Food,Department Store
17,Fremont Park,LowInc,LowRent,0,LowInc,HighDens,LowRent,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Filipino Restaurant,Department Store,Bus Station
27,Magnolia,LowInc,LowRent,0,LowInc,HighDens,LowRent,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Filipino Restaurant,Department Store,Bus Station
30,Midtown,LowInc,LowRent,0,LowInc,HighDens,LowRent,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Filipino Restaurant,Department Store,Bus Station
42,Railroad Square,LowInc,LowRent,0,LowInc,HighDens,LowRent,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Filipino Restaurant,Department Store,Bus Station
52,University Park,LowInc,LowRent,0,LowInc,HighDens,LowRent,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Japanese Restaurant,Department Store,Bus Station
52,University Park,LowInc,LowRent,0,LowInc,HighDens,LowRent,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Japanese Restaurant,Department Store,Bus Station


In [67]:
city1_merged.loc[city1_merged['Cluster Labels'] == 1, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
5,Blackwell,MediumInc,HighRent,1,MediumInc,LowDens,HighRent,Park,Caribbean Restaurant,Grocery Store,Volleyball Court,Seafood Restaurant,Harbor / Marina,Casino
10,Capital Square,MediumInc,HighRent,1,MediumInc,LowDens,HighRent,Food Truck,Music Venue,Food,Garden,Coffee Shop,Café,College Cafeteria
11,Carver,MediumInc,HighRent,1,MediumInc,HighRent,Speakeasy,Coffee Shop,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Wine Shop
20,East Franklin Street,MediumInc,HighRent,1,MediumInc,LowDens,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Music Venue
20,East Franklin Street,MediumInc,HighRent,1,MediumInc,LowDens,HighRent,Coffee Shop,Speakeasy,Café,River,College Cafeteria,College Gym,Music Venue
22,Fan Extension,MediumInc,HighRent,1,MediumInc,HighDens,HighRent,Speakeasy,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Coffee Shop
28,Hollywood Cemetery,MediumInc,LowRent,1,MediumInc,LowDens,LowRent,Speakeasy,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Coffee Shop
43,Shockoe Slip,MediumInc,HighRent,1,MediumInc,LowDens,HighRent,Food Truck,Music Venue,Food,Garden,Coffee Shop,Café,College Cafeteria
46,Tobacco Row,MediumInc,HighRent,1,MediumInc,MediumDens,HighRent,German Restaurant,Gift Shop,Coffee Shop,College Cafeteria,Music Venue,Grocery Store,Bakery
46,Tobacco Row,MediumInc,HighRent,1,MediumInc,MediumDens,HighRent,German Restaurant,Gift Shop,Coffee Shop,College Cafeteria,Music Venue,Grocery Store,Bakery


In [68]:
city2_merged.loc[city2_merged['Cluster Labels'] == 1, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
5,Brookside,HighInc,HighRent,1,HighInc,Pool Hall,LowDens,HighRent,Burger Joint,Bus Station,Business Service,Café,Clothing Store,College Baseball Diamond
13,Elkhorn,HighInc,HighRent,1,HighInc,LowDens,HighRent,Japanese Restaurant,Convention Center,Turkish Restaurant,Bowling Alley,Supermarket,Food,French Restaurant
13,Elkhorn,HighInc,HighRent,1,HighInc,LowDens,HighRent,Japanese Restaurant,Convention Center,Turkish Restaurant,Bowling Alley,Supermarket,Food,French Restaurant
13,Elkhorn,HighInc,HighRent,1,HighInc,LowDens,HighRent,Japanese Restaurant,Convention Center,Turkish Restaurant,Bowling Alley,Supermarket,Food,French Restaurant
14,Enclave at Spanos Park East,HighInc,HighRent,1,HighInc,LowDens,HighRent,Food,Art Gallery,Pool Hall,Coffee Shop,Bus Station,Business Service,Café
14,Enclave at Spanos Park East,HighInc,HighRent,1,HighInc,LowDens,HighRent,Food,Art Gallery,Pool Hall,Coffee Shop,Bus Station,Business Service,Café
23,La Morada,HighInc,HighRent,1,HighInc,LowDens,HighRent,Fast Food Restaurant,Big Box Store,Cosmetics Shop,Fried Chicken Joint,Pizza Place,Hardware Store,Burger Joint
46,Southwest Stockton,HighInc,HighRent,1,HighInc,LowDens,HighRent,Café,Bubble Tea Shop,Department Store,Creperie,Cosmetics Shop,Convention Center,Convenience Store
46,Southwest Stockton,HighInc,HighRent,1,HighInc,LowDens,HighRent,Café,Bubble Tea Shop,Department Store,Creperie,Cosmetics Shop,Convention Center,Convenience Store
47,Spanos Park West,HighInc,HighRent,1,HighInc,Pool Hall,LowDens,HighRent,Burger Joint,Bus Station,Business Service,Café,Clothing Store,College Baseball Diamond


In [69]:
city1_merged.loc[city1_merged['Cluster Labels'] == 2, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
4,Bellevue,HighInc,HighRent,2,HighInc,LowDens,HighRent,Discount Store,Art Gallery,Casino,Chinese Restaurant,Fried Chicken Joint,Southern / Soul Food Restaurant,Thrift / Vintage Store
23,Far West,HighInc,HighRent,2,HighInc,LowDens,HighRent,American Restaurant,Pizza Place,Dry Cleaner,Greek Restaurant,Gas Station,Garden,Frozen Yogurt Shop
23,Far West,HighInc,HighRent,2,HighInc,LowDens,HighRent,American Restaurant,Pizza Place,Dry Cleaner,Greek Restaurant,Gas Station,Garden,Frozen Yogurt Shop
23,Far West,HighInc,HighRent,2,HighInc,LowDens,HighRent,American Restaurant,Pizza Place,Dry Cleaner,Greek Restaurant,Gas Station,Garden,Frozen Yogurt Shop
27,Hermitage Road,HighInc,HighRent,2,HighInc,LowDens,HighRent,Discount Store,Art Gallery,Casino,Chinese Restaurant,Fried Chicken Joint,Southern / Soul Food Restaurant,Thrift / Vintage Store
29,Huguenot,HighInc,HighRent,2,HighInc,HighRent,LowDens,Park,Business Service,Dog Run,Art Gallery,Farmers Market,Music Venue,Donut Shop
29,Huguenot,HighInc,HighRent,2,HighInc,HighRent,LowDens,Park,Business Service,Dog Run,Art Gallery,Farmers Market,Music Venue,Donut Shop


In [70]:
city2_merged.loc[city2_merged['Cluster Labels'] == 2, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
2,Anderson,MediumInc,MediumRent,2,MediumInc,MediumRent,Pizza Place,Japanese Restaurant,Wine Shop,Bubble Tea Shop,Asian Restaurant,Gym / Fitness Center,Bowling Alley,Financial or Legal Service
2,Anderson,MediumInc,MediumRent,2,MediumInc,MediumRent,Pizza Place,Japanese Restaurant,Wine Shop,Bubble Tea Shop,Asian Restaurant,Gym / Fitness Center,Bowling Alley,Financial or Legal Service
6,Civic District,MediumInc,MediumRent,2,MediumDens,MediumRent,MediumInc,Mexican Restaurant,Bank,Coffee Shop,Gas Station,Rental Car Location,Harbor / Marina,Grocery Store
6,Civic District,MediumInc,MediumRent,2,MediumDens,MediumRent,MediumInc,Mexican Restaurant,Bank,Coffee Shop,Gas Station,Rental Car Location,Harbor / Marina,Grocery Store
6,Civic District,MediumInc,MediumRent,2,MediumDens,MediumRent,MediumInc,Mexican Restaurant,Bank,Coffee Shop,Gas Station,Rental Car Location,Harbor / Marina,Grocery Store
6,Civic District,MediumInc,MediumRent,2,MediumDens,MediumRent,MediumInc,Mexican Restaurant,Bank,Coffee Shop,Gas Station,Rental Car Location,Harbor / Marina,Grocery Store
6,Civic District,MediumInc,MediumRent,2,MediumDens,MediumRent,MediumInc,Mexican Restaurant,Bank,Coffee Shop,Gas Station,Rental Car Location,Harbor / Marina,Grocery Store
12,El Pinal,MediumInc,MediumRent,2,MediumInc,MediumDens,MediumRent,Gas Station,College Baseball Diamond,Taco Place,Business Service,Bus Station,Café,Clothing Store
12,El Pinal,MediumInc,MediumRent,2,MediumInc,MediumDens,MediumRent,Gas Station,College Baseball Diamond,Taco Place,Business Service,Bus Station,Café,Clothing Store
20,Holiday Park,MediumInc,MediumRent,2,MediumInc,MediumRent,MediumDens,Pizza Place,Japanese Restaurant,Wine Shop,Asian Restaurant,Gym / Fitness Center,Bowling Alley,Bubble Tea Shop


In [71]:
city1_merged.loc[city1_merged['Cluster Labels'] == 3, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
24,Forest Hill Park,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Park,Business Service,Dog Run,Art Gallery,Farmers Market,Music Venue,Donut Shop
38,Near West,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Garden,Nightclub,Nightlife Spot,River
38,Near West,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Garden,Nightclub,Nightlife Spot,River
38,Near West,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Garden,Nightclub,Nightlife Spot,River
42,Shockoe Bottom,HighInc,HighRent,3,HighInc,MediumDens,HighRent,German Restaurant,Bakery,Pizza Place,BBQ Joint,Playground,Grocery Store,Gift Shop
45,St. John's Church,HighInc,HighRent,3,HighInc,MediumDens,HighRent,German Restaurant,Bakery,Pizza Place,BBQ Joint,Playground,Grocery Store,Gift Shop
52,Woodland Heights,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Park,Business Service,Dog Run,Art Gallery,Farmers Market,Music Venue,Donut Shop


In [72]:
city2_merged.loc[city2_merged['Cluster Labels'] == 3, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
1,Akers,HighInc,MediumRent,3,HighInc,MediumDens,MediumRent,Pizza Place,Japanese Restaurant,Gym / Fitness Center,French Restaurant,Financial or Legal Service,Turkish Restaurant,Fast Food Restaurant
1,Akers,HighInc,MediumRent,3,HighInc,MediumDens,MediumRent,Pizza Place,Japanese Restaurant,Gym / Fitness Center,French Restaurant,Financial or Legal Service,Turkish Restaurant,Fast Food Restaurant
4,Bear Creek District,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Japanese Restaurant,Art Gallery,Convention Center,Turkish Restaurant,Food,French Restaurant,Clothing Store
4,Bear Creek District,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Japanese Restaurant,Art Gallery,Convention Center,Turkish Restaurant,Food,French Restaurant,Clothing Store
4,Bear Creek District,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Japanese Restaurant,Art Gallery,Convention Center,Turkish Restaurant,Food,French Restaurant,Clothing Store
7,Colonial Heights,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Mexican Restaurant,Art Gallery,Asian Restaurant,Bubble Tea Shop,Department Store,Fast Food Restaurant,Financial or Legal Service
7,Colonial Heights,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Mexican Restaurant,Art Gallery,Asian Restaurant,Bubble Tea Shop,Department Store,Fast Food Restaurant,Financial or Legal Service
7,Colonial Heights,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Mexican Restaurant,Art Gallery,Asian Restaurant,Bubble Tea Shop,Department Store,Fast Food Restaurant,Financial or Legal Service
8,Country Club,HighInc,HighRent,3,HighInc,MediumDens,HighRent,College Baseball Diamond,Business Service,Bubble Tea Shop,Department Store,Creperie,Cosmetics Shop,Convention Center
9,Creekside/Wagner,HighInc,HighRent,3,HighInc,MediumDens,HighRent,Food,Art Gallery,Pool Hall,Coffee Shop,Bus Station,Business Service,Café


In [73]:
city1_merged.loc[city1_merged['Cluster Labels'] == 4, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
15,Church Hill North,HighInc,MediumRent,4,HighInc,MediumRent,Gift Shop,Bakery,Pizza Place,BBQ Joint,Playground,Deli / Bodega,Grocery Store,German Restaurant
31,Laburnum Court,HighInc,MediumRent,4,HighInc,LowDens,MediumRent,Discount Store,Art Gallery,Casino,Chinese Restaurant,Fried Chicken Joint,Thrift / Vintage Store,Southern / Soul Food Restaurant
33,Midlothian,HighInc,MediumRent,4,HighInc,LowDens,MediumRent,Park,Art Gallery,Volleyball Court,Dog Run,Grocery Store,Harbor / Marina,Caribbean Restaurant
33,Midlothian,HighInc,MediumRent,4,HighInc,LowDens,MediumRent,Park,Art Gallery,Volleyball Court,Dog Run,Grocery Store,Harbor / Marina,Caribbean Restaurant
40,Old South,HighInc,MediumRent,4,HighInc,LowDens,MediumRent,Park,Art Gallery,Volleyball Court,Dog Run,Grocery Store,Harbor / Marina,Caribbean Restaurant
40,Old South,HighInc,MediumRent,4,HighInc,LowDens,MediumRent,Park,Art Gallery,Volleyball Court,Dog Run,Grocery Store,Harbor / Marina,Caribbean Restaurant
44,Springhill,HighInc,MediumRent,4,HighInc,MediumRent,LowDens,Dog Run,Park,Business Service,Art Gallery,Farmers Market,Music Venue,Donut Shop


In [74]:
city2_merged.loc[city2_merged['Cluster Labels'] == 4, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
26,Louis Park,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Hotel,Harbor / Marina,Smoke Shop,Convenience Store,College Baseball Diamond,Bakery,Business Service
26,Louis Park,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Hotel,Harbor / Marina,Smoke Shop,Convenience Store,College Baseball Diamond,Bakery,Business Service
28,Mariposa,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
28,Mariposa,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
38,Park District,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
38,Park District,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
38,Park District,LowInc,MediumRent,4,LowInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café


In [75]:
city1_merged.loc[city1_merged['Cluster Labels'] == 5, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
3,Belle Isle,MediumInc,MediumRent,5,MediumInc,LowDens,MediumRent,Speakeasy,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site,Coffee Shop
7,Broad Rock,MediumInc,MediumRent,5,MediumInc,LowDens,MediumRent,Park,Caribbean Restaurant,Platform,Moving Target,Volleyball Court,Seafood Restaurant,Harbor / Marina
7,Broad Rock,MediumInc,MediumRent,5,MediumInc,LowDens,MediumRent,Park,Caribbean Restaurant,Platform,Moving Target,Volleyball Court,Seafood Restaurant,Harbor / Marina
14,Church Hill Central,MediumInc,MediumRent,5,MediumInc,MediumDens,MediumRent,Gift Shop,Bakery,Playground,Grocery Store,Deli / Bodega,German Restaurant,BBQ Joint
19,East,MediumInc,MediumRent,5,MediumInc,MediumDens,MediumRent,Beach,Garden,Hotel,Grocery Store,Beach Bar,Gift Shop,Bakery
19,East,MediumInc,MediumRent,5,MediumInc,MediumDens,MediumRent,Beach,Garden,Hotel,Grocery Store,Beach Bar,Gift Shop,Bakery
19,East,MediumInc,MediumRent,5,MediumInc,MediumDens,MediumRent,Beach,Garden,Hotel,Grocery Store,Beach Bar,Gift Shop,Bakery
25,Ginter Park,MediumInc,MediumRent,5,MediumRent,MediumDens,MediumInc,Casino,Chinese Restaurant,Art Gallery,Discount Store,Fried Chicken Joint,Southern / Soul Food Restaurant,Thrift / Vintage Store
25,Ginter Park,MediumInc,MediumRent,5,MediumRent,MediumDens,MediumInc,Casino,Chinese Restaurant,Art Gallery,Discount Store,Fried Chicken Joint,Southern / Soul Food Restaurant,Thrift / Vintage Store
39,North,MediumInc,MediumRent,5,MediumRent,MediumInc,MediumDens,Speakeasy,Art Gallery,Historic Site,Casino,Chinese Restaurant,Nightclub,Nightlife Spot


In [76]:
city2_merged.loc[city2_merged['Cluster Labels'] == 5, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
3,August,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Gas Station,Taco Place,College Baseball Diamond,Bus Station,Business Service,Café,Clothing Store
15,Fair Oaks,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
15,Fair Oaks,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
19,Gleason Park,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Harbor / Marina,Taco Place,Convenience Store,Smoke Shop,Hotel,Bakery,Gas Station
19,Gleason Park,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Harbor / Marina,Taco Place,Convenience Store,Smoke Shop,Hotel,Bakery,Gas Station
19,Gleason Park,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Harbor / Marina,Taco Place,Convenience Store,Smoke Shop,Hotel,Bakery,Gas Station
32,Mormon,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Gas Station,Taco Place,College Baseball Diamond,Bus Station,Business Service,Café,Clothing Store
55,Waterloo,LowInc,MediumRent,5,MediumRent,LowInc,MediumDens,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Department Store,Bus Station,Burger Joint
55,Waterloo,LowInc,MediumRent,5,MediumRent,LowInc,MediumDens,Mexican Restaurant,Coffee Shop,Rental Car Location,Bank,Department Store,Bus Station,Burger Joint
60,Wilson,LowInc,MediumRent,5,LowInc,MediumDens,MediumRent,Gas Station,Taco Place,College Baseball Diamond,Bus Station,Business Service,Café,Clothing Store


In [77]:
city1_merged.loc[city1_merged['Cluster Labels'] == 6, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
6,Boulevard,HighInc,HighRent,6,HighInc,HighRent,Speakeasy,Nightclub,Nightlife Spot,Historic Site,Coffee Shop,River,Garden,Donut Shop
6,Boulevard,HighInc,HighRent,6,HighInc,HighRent,Speakeasy,Nightclub,Nightlife Spot,Historic Site,Coffee Shop,River,Garden,Donut Shop
13,Chimborazo Park,HighInc,HighRent,6,HighInc,HighDens,HighRent,German Restaurant,Bakery,Pizza Place,BBQ Joint,Playground,Grocery Store,Gift Shop
21,Fan,HighInc,HighRent,6,HighInc,HighDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site
36,Monument Avenue,HighInc,HighRent,6,HighInc,HighDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Garden,Nightclub,Nightlife Spot,River
36,Monument Avenue,HighInc,HighRent,6,HighInc,HighDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Garden,Nightclub,Nightlife Spot,River
37,Museum District,HighInc,HighRent,6,HighInc,HighRent,Garden,Speakeasy,Deli / Bodega,Café,Caribbean Restaurant,Casino,Chinese Restaurant,Coffee Shop
51,West Grace Street,HighInc,HighRent,6,HighInc,HighDens,HighRent,Speakeasy,Coffee Shop,Donut Shop,Nightclub,Nightlife Spot,River,Historic Site


In [78]:
city2_merged.loc[city2_merged['Cluster Labels'] == 6, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
18,Garden Acres,MediumInc,MediumRent,6,MediumInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
18,Garden Acres,MediumInc,MediumRent,6,MediumInc,LowDens,MediumRent,Gas Station,Taco Place,Grocery Store,Coffee Shop,Bus Station,Business Service,Café
21,Kennedy,MediumInc,MediumRent,6,MediumInc,LowDens,MediumRent,Gas Station,Taco Place,College Baseball Diamond,Bus Station,Business Service,Café,Clothing Store
21,Kennedy,MediumInc,MediumRent,6,MediumInc,LowDens,MediumRent,Gas Station,Taco Place,College Baseball Diamond,Bus Station,Business Service,Café,Clothing Store
29,McKinley Park,MediumInc,MediumRent,6,MediumInc,LowDens,Café,MediumRent,Wine Shop,College Baseball Diamond,Bus Station,Business Service,Clothing Store,Coffee Shop
29,McKinley Park,MediumInc,MediumRent,6,MediumInc,LowDens,Café,MediumRent,Wine Shop,College Baseball Diamond,Bus Station,Business Service,Clothing Store,Coffee Shop
45,Southeast Stockton,MediumInc,MediumRent,6,MediumRent,MediumInc,LowDens,Gas Station,Café,Pizza Place,Grocery Store,Bakery,Mexican Restaurant,American Restaurant
45,Southeast Stockton,MediumInc,MediumRent,6,MediumRent,MediumInc,LowDens,Gas Station,Café,Pizza Place,Grocery Store,Bakery,Mexican Restaurant,American Restaurant
45,Southeast Stockton,MediumInc,MediumRent,6,MediumRent,MediumInc,LowDens,Gas Station,Café,Pizza Place,Grocery Store,Bakery,Mexican Restaurant,American Restaurant
45,Southeast Stockton,MediumInc,MediumRent,6,MediumRent,MediumInc,LowDens,Gas Station,Café,Pizza Place,Grocery Store,Bakery,Mexican Restaurant,American Restaurant


In [79]:
city1_merged.loc[city1_merged['Cluster Labels'] == 7, city1_merged.columns[[1] + list(range(5, city1_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
17,Court End,LowInc,HighRent,7,LowInc,LowDens,HighRent,History Museum,College Cafeteria,Fast Food Restaurant,Café,Food Truck,Music Venue,Coffee Shop
17,Court End,LowInc,HighRent,7,LowInc,LowDens,HighRent,History Museum,College Cafeteria,Fast Food Restaurant,Café,Food Truck,Music Venue,Coffee Shop
26,Grace Street Commercial,LowInc,HighRent,7,LowInc,LowDens,HighRent,Food Truck,Music Venue,Food,Garden,Coffee Shop,Café,College Cafeteria
32,Manchester,LowInc,LowRent,7,LowInc,LowDens,LowRent,Park,Caribbean Restaurant,Grocery Store,Volleyball Court,Seafood Restaurant,Harbor / Marina,Casino


In [80]:
city2_merged.loc[city2_merged['Cluster Labels'] == 7, city2_merged.columns[[1] + list(range(5, city2_merged.shape[1]))]]

Unnamed: 0,Neighborhood,MedInc,MedRent,Cluster Labels,1st Most Common Attribute,2nd Most Common Attribute,3rd Most Common Attribute,4th Most Common Attribute,5th Most Common Attribute,6th Most Common Attribute,7th Most Common Attribute,8th Most Common Attribute,9th Most Common Attribute,10th Most Common Attribute
0,Airport,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Gas Station,Grocery Store,Taco Place,Wine Shop,Bus Station,Business Service,Café
0,Airport,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Gas Station,Grocery Store,Taco Place,Wine Shop,Bus Station,Business Service,Café
0,Airport,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Gas Station,Grocery Store,Taco Place,Wine Shop,Bus Station,Business Service,Café
11,East Stockton,MediumInc,HighRent,7,HighRent,LowDens,MediumInc,Gas Station,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Creperie,Cosmetics Shop,Hardware Store
11,East Stockton,MediumInc,HighRent,7,HighRent,LowDens,MediumInc,Gas Station,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Creperie,Cosmetics Shop,Hardware Store
11,East Stockton,MediumInc,HighRent,7,HighRent,LowDens,MediumInc,Gas Station,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Creperie,Cosmetics Shop,Hardware Store
11,East Stockton,MediumInc,HighRent,7,HighRent,LowDens,MediumInc,Gas Station,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Creperie,Cosmetics Shop,Hardware Store
11,East Stockton,MediumInc,HighRent,7,HighRent,LowDens,MediumInc,Gas Station,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Creperie,Cosmetics Shop,Hardware Store
11,East Stockton,MediumInc,HighRent,7,HighRent,LowDens,MediumInc,Gas Station,Pizza Place,Fast Food Restaurant,Japanese Restaurant,Creperie,Cosmetics Shop,Hardware Store
43,Seaport District,MediumInc,HighRent,7,MediumInc,LowDens,HighRent,Hotel,Café,Harbor / Marina,Smoke Shop,Convenience Store,Bakery,Bus Station
