# Week 3 Capstone Notebook

### Part 1: Data Scrape & Clean

In [2]:
# import relavent libraries
import numpy as np
import pandas as pd
from urllib.request import urlopen

#!conda install -c conda-forge bs4 --yes # only run first time
from bs4 import BeautifulSoup
import re

In [3]:
# Pull html from url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urlopen(url)

# Create BeautifulSoup object
soup = BeautifulSoup(html, 'html.parser')
type(soup)

bs4.BeautifulSoup

In [11]:
# Pull rows from object
rows = soup.find_all('tr')

# Clean rows
list_rows = []
for row in rows:
    cells = row.find_all('td')
    str_cells = str(cells)
    clean = re.compile('<.*?>')
    clean2 = (re.sub(clean, '',str_cells))
    list_rows.append(clean2)
    
# Transform into df
df = pd.DataFrame(list_rows)
df = df[0].str.split(',', expand=True)
for i in range(3):
    df[i] = df[i].str.strip('[')
    df[i] = df[i].str.strip(']')
    df[i] = df[i].str.strip('\n')
    
# add columns
col = pd.DataFrame(['Postal Code','Borough','Neighborhood']).transpose()
df = df.rename(columns=col.iloc[0])

# drop extra rows/columns
df = df.iloc[1:,:3]

# drop 'not assigned' rows
df = df[df.Borough != ' Not assigned']

# split neighborhood with ','
df['Neighborhood'] = df['Neighborhood'].str.replace(' /',',')

# reset index
df.reset_index(drop=True,inplace=True)

df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,Garden District


In [5]:
df.shape

(107, 3)

### Part 2: Add Lat & Long

#### Part 2a: Format to the courses requests

Geocoder did not work, therefore I will use the .csv

In [15]:
# read csv into df
url = "https://cocl.us/Geospatial_data"
lon_lat_df = pd.read_csv(url)

lon_lat_df.shape

(103, 3)

In [13]:
# merge longitude and latitude into neighborhood df
ll_df = df.merge(lon_lat_df, on = 'Postal Code')
ll_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Part 2b: Format data to my liking

Unhappy with the state of data so far
I am going to:
1. split the neighborhood in each borough
2. find the lat long coords for each neighborhood

In [34]:
# import libraries
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

In [146]:
# split neighborhoods in row
df1 = df
df1 = df1['Neighborhood'].str.split(',', expand=True)
df2 = df.merge(df1, on = df.index)
df2.drop(columns = ['key_0','Neighborhood'], inplace=True)

# split neighborhoods into new rows
df_new = pd.DataFrame([])

# split hoods
for row in df2.index:
    for col in df2.columns[2:]:
        if df2.iloc[row,col+2] != None:
            df_temp = df2.iloc[row,:]
            df_temp[0] = df_temp[col]
            df_new = df_new.append(df_temp, ignore_index=True)

# clean rows
df_new['P'] = df_new['Postal Code']
df_new['B'] = df_new['Borough']
df_new['N'] = df_new[0]
df_new = df_new.iloc[:-3,-3:]
df_new = df_new.rename(columns={'P':'Postal Code','B':'Borough','N':'Neighborhood'})
#df_new['Neighborhood'] = df_new['Neighborhood'].str.replace(' ','')

df_new

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Manor
...,...,...,...
203,M8Z,Etobicoke,Mimico NW
204,M8Z,Etobicoke,The Queensway West
205,M8Z,Etobicoke,South of Bloor
206,M8Z,Etobicoke,Kingsway Park South West


In [228]:
# get lat & long
longitude = pd.DataFrame([])
latitude = pd.DataFrame([])

for hood in df_new['Neighborhood']:
    address = '{}, ON'.format(hood)

    geolocator = Nominatim(user_agent="toronto_explorer")
    location = geolocator.geocode(address)
    if location != None:
        long = location.longitude
        lat = location.latitude
    else:
        long = np.nan
        lat = np.nan
    longitude = longitude.append(pd.DataFrame([long]))
    latitude = latitude.append(pd.DataFrame([lat])) 

In [229]:
ll_df.shape, longitude.shape

((208, 3), (208, 1))

In [236]:
# add long & lat
longitude.reset_index(drop=True, inplace=True)
latitude.reset_index(drop=True, inplace=True)

ll_df = df_new
ll_df['Longitude'] = longitude
ll_df['Latitude'] = latitude

ll_df.shape

(208, 5)

In [252]:
# remove rows w/ nan
ll_df.dropna(inplace=True)
ll_df.reset_index(drop=True, inplace=True)

ll_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Longitude,Latitude
0,M3A,North York,Parkwoods,-79.320197,43.7588
1,M4A,North York,Victoria Village,-79.311189,43.732658
2,M5A,Downtown Toronto,Regent Park,-79.360457,43.660706
3,M5A,Downtown Toronto,Harbourfront,-79.38015,43.64008
4,M6A,North York,Lawrence Manor,-79.437507,43.722079


In [292]:
fin_list = [' Central Toronto', ' Downtown Toronto',' East Toronto',' West Toronto']
tor_df = pd.DataFrame([])
ll_df.loc[2,'Borough']

for row in ll_df.index:
    if ll_df.loc[row,'Latitude'] >= 43:
        if ll_df.loc[row,'Latitude'] <= 45:
            if ll_df.loc[row,'Longitude'] >= -80:
                if ll_df.loc[row,'Longitude'] <= -78:
                    tor_df = tor_df.append(ll_df.loc[row])
        
tor_df.shape

(165, 5)

In [298]:
tor_df.drop(columns = ['Postal Code'], inplace=True)
tor_df.reset_index(drop=True, inplace=True)
tor_df.head()

Unnamed: 0,Borough,Latitude,Longitude,Neighborhood
0,North York,43.7588,-79.320197,Parkwoods
1,North York,43.732658,-79.311189,Victoria Village
2,Downtown Toronto,43.660706,-79.360457,Regent Park
3,Downtown Toronto,43.64008,-79.38015,Harbourfront
4,North York,43.722079,-79.437507,Lawrence Manor


### Part 3: Cluster & Visualize

In [246]:
# import libraries
import folium

In [247]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
longitude = location.longitude
latitude = location.latitude

In [299]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(tor_df['Latitude'], tor_df['Longitude'], tor_df['Borough'], tor_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

In [389]:
# display map
map_toronto

### Access foursquare data

In [307]:
import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

In [304]:
# initialize for Foursqyare API
CLIENT_ID = '2LGT5DBF3CF1KCWNVP3GLC0M5MSCP330DZLZK224AJSFERY5' # your Foursquare ID
CLIENT_SECRET = '0VNMBF0U12VI1HAZUB1JG4BFXQWY2D2YU1VVBZQOWXA0I2LF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500

In [305]:
# function to pull nearby data from Foursquare
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [308]:
# run to pull all Toronto Venues
toronto_venues = getNearbyVenues(names = tor_df['Neighborhood'],
                          latitudes = tor_df['Latitude'],
                          longitudes = tor_df['Longitude']
                         )



 Parkwoods
 Victoria Village
 Regent Park
 Harbourfront
 Lawrence Manor
 Lawrence Heights
 Queen's Park
 Islington Avenue
 Malvern
 Rouge
 Don Mills
 Woodbine Gardens
 Garden District
 Glencairn
 West Deane Park
 Princess Gardens
 Martin Grove
 Islington
 Rouge Hill
 Port Union
 Highland Creek
 Don Mills
 Woodbine Heights
 St. James Town
 Eringate
 Bloordale Gardens
 Old Burnhamthorpe
 Markland Wood
 Guildwood
 West Hill
 The Beaches
 Berczy Park
 Woburn
 Leaside
 Christie
 Cedarbrae
 Hillcrest Village
 Bathurst Manor
 Wilson Heights
 Downsview North
 Thorncliffe Park
 King
 Dovercourt Village
 Scarborough Village
 Henry Farm
 Oriole
 Northwood Park
 York University
 East Toronto
 Harbourfront East
 Union Station
 Toronto Islands
 Little Portugal
 Kennedy Park
 Ionview
 East Birchmount Park
 Bayview Village
 Downsview
 The Danforth West
 Riverdale
 Toronto Dominion Centre
 Design Exchange
 Brockton
 Parkdale Village
 Exhibition Place
 Golden Mile
 Clairlea
 Oakridge
 York Mills
 Silver

In [310]:
print(toronto_venues.shape)
toronto_venues.head()

(4331, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7588,-79.320197,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.7588,-79.320197,LCBO,43.757774,-79.314257,Liquor Store
2,Parkwoods,43.7588,-79.320197,Petro-Canada,43.75795,-79.315187,Gas Station
3,Parkwoods,43.7588,-79.320197,Shoppers Drug Mart,43.760857,-79.324961,Pharmacy
4,Parkwoods,43.7588,-79.320197,Pizza Pizza,43.760231,-79.325666,Pizza Place


### Analyze Venue Data

In [338]:
# one hot encoding
t_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
t_onehot.drop(columns='Neighborhood',inplace=True)
t_onehot.shape

(4331, 308)

In [339]:
# save to move neighborhood column to the first column
fixed_columns = ['Neighborhood'] + list(t_onehot.columns[:])

# add neighborhood column back to dataframe
t_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
t_onehot = t_onehot[fixed_columns]

t_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Service,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [340]:
# get % for comparisons
t_grouped = t_onehot.groupby('Neighborhood').mean().reset_index()
t_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Service,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.083333,0.0,0.0,0.0,0.000000,0.00,0.00
1,Agincourt North,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.037037,0.0,0.0,0.0,0.037037,0.00,0.00
2,Alderwood,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00
3,Bathurst Manor,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00
4,Bathurst Quay,0.0,0.0,0.041667,0.041667,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149,Woodbine Heights,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00
150,York Mills,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00
151,York Mills West,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00
152,York University,0.0,0.0,0.000000,0.000000,0.00,0.0,0.0,0.0,0.00,...,0.00,0.0,0.00,0.000000,0.0,0.0,0.0,0.000000,0.00,0.00


In [342]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [379]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = t_grouped['Neighborhood']

for ind in np.arange(t_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(t_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Asian Restaurant,Korean Restaurant,Hong Kong Restaurant,Food Court,Vietnamese Restaurant,Cantonese Restaurant,Coffee Shop,Shopping Mall,Train Station
1,Agincourt North,Chinese Restaurant,Bank,Beer Store,Restaurant,Movie Theater,Clothing Store,Frozen Yogurt Shop,Fried Chicken Joint,Bakery,Coffee Shop
2,Alderwood,Pizza Place,Pharmacy,Gym,Dance Studio,Pub,Sandwich Place,Skating Rink,Coffee Shop,Event Space,Donut Shop
3,Bathurst Manor,Playground,Convenience Store,Baseball Field,Park,Fast Food Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
4,Bathurst Quay,Coffee Shop,Café,Park,Bank,Diner,Sushi Restaurant,Rental Car Location,Caribbean Restaurant,Garden,Ramen Restaurant


### Cluster Data

In [380]:
# set number of clusters
kclusters = 6

t_grouped_clustering = t_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(t_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_


array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 3, 1, 1, 5, 5, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       3, 0, 0, 0, 0, 1, 1, 1, 1, 3, 1, 1, 1, 1, 4, 1, 1, 0, 3, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
      dtype=int32)

In [381]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

t_merged = tor_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
t_merged = t_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

t_merged.head() # check the last columns!

Unnamed: 0,Borough,Latitude,Longitude,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,43.7588,-79.320197,Parkwoods,1.0,Convenience Store,Bank,Pharmacy,Discount Store,Coffee Shop,Chinese Restaurant,Caribbean Restaurant,Shopping Mall,Bus Line,Liquor Store
1,North York,43.732658,-79.311189,Victoria Village,0.0,Spa,Middle Eastern Restaurant,Thai Restaurant,Asian Restaurant,Bus Line,Park,Fish Market,Fish & Chips Shop,Filipino Restaurant,Flea Market
2,Downtown Toronto,43.660706,-79.360457,Regent Park,1.0,Coffee Shop,Thai Restaurant,Pub,Auto Dealership,Electronics Store,Restaurant,Beer Store,Food Truck,Pharmacy,Indian Restaurant
3,Downtown Toronto,43.64008,-79.38015,Harbourfront,1.0,Coffee Shop,Café,Restaurant,Hotel,Italian Restaurant,Bank,Chinese Restaurant,Plaza,Pizza Place,Sushi Restaurant
4,North York,43.722079,-79.437507,Lawrence Manor,0.0,Electronics Store,Kids Store,Doctor's Office,Park,Bank,Yoga Studio,Farmers Market,Ethiopian Restaurant,Event Space,Falafel Restaurant


In [382]:
t_merged.dropna(inplace=True)

In [383]:
t_merged.describe()

Unnamed: 0,Latitude,Longitude,Cluster Labels
count,162.0,162.0,162.0
mean,43.701375,-79.400433,1.030864
std,0.062462,0.112251,0.742857
min,43.503584,-79.749472,0.0
25%,43.649826,-79.475998,1.0
50%,43.698738,-79.396142,1.0
75%,43.752073,-79.314538,1.0
max,43.823174,-79.130499,5.0


In [384]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(t_merged['Latitude'], t_merged['Longitude'], t_merged['Neighborhood'], t_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)

In [387]:
map_clusters

In [386]:
t_merged.groupby('Cluster Labels').count()

Unnamed: 0_level_0,Borough,Latitude,Longitude,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Cluster Labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0.0,21,21,21,21,21,21,21,21,21,21,21,21,21,21
1.0,129,129,129,129,129,129,129,129,129,129,129,129,129,129
2.0,3,3,3,3,3,3,3,3,3,3,3,3,3,3
3.0,6,6,6,6,6,6,6,6,6,6,6,6,6,6
4.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1
5.0,2,2,2,2,2,2,2,2,2,2,2,2,2,2


### Cluster 1

In [388]:
t_merged.loc[t_merged['Cluster Labels'] == 1, t_merged.columns[[1] + list(range(5, t_merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43.758800,Convenience Store,Bank,Pharmacy,Discount Store,Coffee Shop,Chinese Restaurant,Caribbean Restaurant,Shopping Mall,Bus Line,Liquor Store
2,43.660706,Coffee Shop,Thai Restaurant,Pub,Auto Dealership,Electronics Store,Restaurant,Beer Store,Food Truck,Pharmacy,Indian Restaurant
3,43.640080,Coffee Shop,Café,Restaurant,Hotel,Italian Restaurant,Bank,Chinese Restaurant,Plaza,Pizza Place,Sushi Restaurant
5,43.722778,Clothing Store,Coffee Shop,Restaurant,Sporting Goods Shop,Accessories Store,Women's Store,Electronics Store,Chocolate Shop,Toy / Game Store,Furniture / Home Store
6,43.659659,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Vegetarian / Vegan Restaurant,Chinese Restaurant,Ice Cream Shop,Restaurant,Japanese Restaurant,Bubble Tea Shop
...,...,...,...,...,...,...,...,...,...,...,...
160,43.503584,Pizza Place,Park,Hotel,American Restaurant,Portuguese Restaurant,Coffee Shop,Fast Food Restaurant,Gas Station,Asian Restaurant,Eastern European Restaurant
161,43.616677,Bakery,Bar,Café,Skating Rink,Farmers Market,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
162,43.574043,Coffee Shop,Deli / Bodega,Sushi Restaurant,Park,Trail,Italian Restaurant,Yoga Studio,Electronics Store,Ethiopian Restaurant,Event Space
163,43.503584,Pizza Place,Park,Hotel,American Restaurant,Portuguese Restaurant,Coffee Shop,Fast Food Restaurant,Gas Station,Asian Restaurant,Eastern European Restaurant
