# Final Capstone project

In [1]:
# Download beautifulsoup4 library for webscraping, if not installed
# !conda install beautifulsoup4

from bs4 import BeautifulSoup
import requests
import pandas as pd
import csv

### Scrape Wiki page using Beautiful Soup

In [2]:
pd.set_option('max_colwidth', 800)
source = requests.get('https://en.wikipedia.org/wiki/List_of_Houston_neighborhoods').text 
soup = BeautifulSoup(source, 'lxml')


### Write contents to CSV file

In [3]:
#soup

csv_file = open('Houston_codes.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Postcode', 'Borough', 'Neighbourhood'])

32

In [4]:
table = soup.find('table', class_ = 'wikitable sortable') # Gets the table from the webpage
rows = table.find_all('tr') # Gets the table rows

postcodes = [] # Initializes the raw postcodes list
boroughs = [] # Initializes the raw boroughs list
neighbourhoods = [] # Initializes the raw neighbourhoods list

for row in rows:    
    columns = row.find_all('td')
    try :
        if columns[1].text != 'Not assigned':  # To skip if the borough name is 'Not Assigned'
            
            postcode = columns[0].text
            postcodes.append(postcode)
            
            borough = columns[1].text
            boroughs.append(borough)
            
            neighbourhood = columns[2].text.split('\n')[0] # Removing the newline character at the end     
            
            if neighbourhood == 'Not assigned': # Assigning the same name to neighbourhood if it is 'Not Assigned'
                neighbourhood = borough            
                
            neighbourhoods.append(neighbourhood)
             
    except Exception as e : # To skip the first row which contains column names
        pass 
    
postcode_explored = [] # Initializing the list of explored postcodes
for index_i, postcode_i in enumerate(postcodes) :   
    if postcode_i not in postcode_explored :
        nbds = neighbourhoods[index_i]
        for index_f, postcode_f in enumerate(postcodes) :
            if postcode_i == postcode_f and index_i != index_f:
                nbds = nbds + ', ' + neighbourhoods[index_f] # Concatenating the neighbourhood names
        csv_writer.writerow([postcode_i, boroughs[index_i], nbds]) # Writing the rows in the csv file
        postcode_explored.append(postcode_i)

In [5]:
csv_file.close()

### get df.shape 

In [6]:
df = pd.read_csv('Houston_codes.csv')
df.shape


(18, 3)

In [7]:
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,5 Corners District\r\n,"Almeda, Fondren Gardens, Winchester","Interstate 610 to the north, Beltway 8 to the south, and Texas State Highway 288 to the east"
1,Baybrook Management District\r\n,Baybrook Mall\r\n,"Farm-to-Market Road 528 to the south, Interstate 45 to the east and north"
2,Downtown District\r\n,Downtown,"Enclosed by Interstate 45, Interstate 10, and Interstate 69 (U.S. Highway 59)"
3,East Downtown Management District\r\n,East Downtown,East of Interstate 69 and north of Interstate 45
4,Generation Park Management District\r\n,Undeveloped area\r\n,"Summerwood subdivision to the north, Deussen Parkway to the east, Lake Houston Parkway to the south, Beltway 8 to the west"
5,Greater East End Management District\r\n,"East End, Magnolia Park, and Harrisburg","East of the East Downtown Management District, north of Interstate 45, south of Clinton Drive, and east of Interstate 610"
6,Greater Northside Management District\r\n,"Near Northside, part of the Heights, Independence Heights, Northline",Generally east of Interstate 45 and west of Interstate 69 from Interstate 10 north to Little York Road
7,Houston Southeast\r\n,"Third Ward, the Texas Medical Center, Riverside Terrace, South Union / OST","East of Main Street and Interstate 69, south of Interstate 45, and west of Texas State Highway 35 (Spur 5)"
8,International Management District\r\n,Alief and Little Saigon\r\n,"Westpark Tollway to the north, Beltway 8 to the east, Bissonnet Street and Bellfort Street to the south, Texas State Highway 6 to the west"
9,Memorial Management District\r\n,Memorial City,Adjacent to Interstate 10 east of Beltway 8


In [8]:
postal_codes = df['Postcode'].values


### Using Geocoder API to get geo data

In [9]:
API_KEY='65183f4caad7471ba05770b07bb594a1'
import json

latitudes = [] # Initializing the latitude array
longitudes = [] # Initializing the longitude array

for postal_code in postal_codes : 
    place_name = postal_code + " Houston, TX" # Formats the place name
    url = 'https://api.opencagedata.com/geocode/v1/json?q={}&key={}'.format(place_name, API_KEY) # Gets the proper url to make the API call
    obj = json.loads(requests.get(url).text) # Loads the JSON file in the form of a python dictionary
    
    results = obj['results'] # Extracts the results information out of the JSON file
    lat = results[0]['geometry']['lat'] # Extracts the latitude value
    lng = results[0]['geometry']['lng'] # Extracts the longitude value
    
    latitudes.append(lat) # Appending to the list of latitudes
    longitudes.append(lng) # Appending to the list of longitudes


#### Add latitude and longitude to df and run df.head to display appended columns

In [10]:
df['Latitude'] = latitudes
df['Longitude'] = longitudes
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,5 Corners District\r\n,"Almeda, Fondren Gardens, Winchester","Interstate 610 to the north, Beltway 8 to the south, and Texas State Highway 288 to the east",29.987904,-95.479542
1,Baybrook Management District\r\n,Baybrook Mall\r\n,"Farm-to-Market Road 528 to the south, Interstate 45 to the east and north",29.576304,-95.128733
2,Downtown District\r\n,Downtown,"Enclosed by Interstate 45, Interstate 10, and Interstate 69 (U.S. Highway 59)",32.782611,-96.808781
3,East Downtown Management District\r\n,East Downtown,East of Interstate 69 and north of Interstate 45,29.831391,-95.276863
4,Generation Park Management District\r\n,Undeveloped area\r\n,"Summerwood subdivision to the north, Deussen Parkway to the east, Lake Houston Parkway to the south, Beltway 8 to the west",29.907106,-95.179968
5,Greater East End Management District\r\n,"East End, Magnolia Park, and Harrisburg","East of the East Downtown Management District, north of Interstate 45, south of Clinton Drive, and east of Interstate 610",29.741647,-95.256811
6,Greater Northside Management District\r\n,"Near Northside, part of the Heights, Independence Heights, Northline",Generally east of Interstate 45 and west of Interstate 69 from Interstate 10 north to Little York Road,30.004339,-95.412106
7,Houston Southeast\r\n,"Third Ward, the Texas Medical Center, Riverside Terrace, South Union / OST","East of Main Street and Interstate 69, south of Interstate 45, and west of Texas State Highway 35 (Spur 5)",29.767424,-95.366939
8,International Management District\r\n,Alief and Little Saigon\r\n,"Westpark Tollway to the north, Beltway 8 to the east, Bissonnet Street and Bellfort Street to the south, Texas State Highway 6 to the west",29.941288,-95.327246
9,Memorial Management District\r\n,Memorial City,Adjacent to Interstate 10 east of Beltway 8,29.935417,-95.458328


### import folium for maps 

In [11]:
# Downloading folium, if not installed
!conda install -c conda-forge folium=0.5.0 --yes
import folium # Map plotting library
import numpy as np
from pandas.io.json import json_normalize # Tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# Import k-means from clustering stage
from sklearn.cluster import KMeans

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [12]:
df[['Postcode','Latitude', 'Longitude']]
df['Postcode']=df.Postcode.replace({'\r\n':''}, regex=True)
df[['Postcode','Latitude', 'Longitude']]

Unnamed: 0,Postcode,Latitude,Longitude
0,5 Corners District,29.987904,-95.479542
1,Baybrook Management District,29.576304,-95.128733
2,Downtown District,32.782611,-96.808781
3,East Downtown Management District,29.831391,-95.276863
4,Generation Park Management District,29.907106,-95.179968
5,Greater East End Management District,29.741647,-95.256811
6,Greater Northside Management District,30.004339,-95.412106
7,Houston Southeast,29.767424,-95.366939
8,International Management District,29.941288,-95.327246
9,Memorial Management District,29.935417,-95.458328


### Create Folium Map of Houston

In [13]:
# Houston latitude and longitude using Google search
tor_lat = 29.7604
tor_lng = -95.3698

# Creates map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[tor_lat, tor_lng], zoom_start=10)

#Add markers to map
for lat, lng, postcode in zip(df['Latitude'], df['Longitude'], df['Postcode']):
    label = postcode
    label = folium.Popup(label, parse_html=False)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Function to get category name

In [14]:

# Get category name

def get_category_type(row):
    try:
        categories_list = row['Category']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Using Foursquare API 

In [15]:
CLIENT_ID = 'G5W3BYQXCJCJWOSI0HQZZXGGEYA0C2AYNJ20VFIF53INMLKO' # your Foursquare ID
CLIENT_SECRET = 'PNDIFUCBRRPFK0QFQBWLPZVLMJWMIZW4ZFVAZYJM2XKA4MD0' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

explore_df_list = []

for i, nbd_name in enumerate(df['Postcode']):  
    
    try :         
        ### Getting the data of neighbourhood
        nbd_name = df.loc[i, 'Postcode']
        nbd_lat = df.loc[i, 'Latitude']
        nbd_lng = df.loc[i, 'Longitude']

        radius = 500 # Setting the radius as 500 metres
        LIMIT = 100 # Getting the top 100 venues

        url = 'https://api.foursquare.com/v2/venues/explore?client_id={} \
        &client_secret={}&ll={},{}&v={}&radius={}&limit={}'\
        .format(CLIENT_ID, CLIENT_SECRET, nbd_lat, nbd_lng, VERSION, radius, LIMIT)

        results = json.loads(requests.get(url).text)
        results = results['response']['groups'][0]['items']

        nearby = json_normalize(results) # Flattens JSON

        # Filtering the columns
        filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
        nearby = nearby.loc[:, filtered_columns]

        # Renaming the columns
        columns = ['Name', 'Category', 'Latitude', 'Longitude']
        nearby.columns = columns

        # Gets the categories
        nearby['Category'] = nearby.apply(get_category_type, axis=1)

        # Gets the data required
        for i, name in enumerate(nearby['Name']):
            explore_df_list.append([nbd_name, nbd_lat, nbd_lng] + nearby.loc[i, :].values.tolist())
    
    except Exception as e:
        pass


### Create dataframe

In [16]:

explore_df = pd.DataFrame([item for item in explore_df_list])
explore_df.columns = ['Neighbourhood', 'Neighbourhood Latitude', 'Neighbourhood Longitude', 'Venue Name', 'Venue Category', 'Venue Latitude', 'Venue Longitude']
explore_df.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue Name,Venue Category,Venue Latitude,Venue Longitude
0,5 Corners District,29.987904,-95.479542,Cici's Pizza,Pizza Place,29.990991,-95.48081
1,5 Corners District,29.987904,-95.479542,Walters Cornerstore,Convenience Store,29.988025,-95.477487
2,5 Corners District,29.987904,-95.479542,Route 420 Smoke Shop,Smoke Shop,29.988062,-95.477386
3,5 Corners District,29.987904,-95.479542,Indoor Tactical Sports Houston,Athletics & Sports,29.988415,-95.48294
4,Baybrook Management District,29.576304,-95.128733,Akkerman Engineering & Manufacturing,Business Service,29.572953,-95.130615


### One hot encoding for category types

In [17]:
toronto_onehot = pd.get_dummies(explore_df[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Neighbourhood'] = explore_df['Neighbourhood'] 
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Aquarium,Arts & Crafts Store,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bar,Breakfast Spot,...,Shopping Mall,Smoke Shop,Southern / Soul Food Restaurant,Steakhouse,Supplement Shop,Theater,Train Station,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,5 Corners District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,5 Corners District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,5 Corners District,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,5 Corners District,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Baybrook Management District,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [44]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.shape

(15, 77)

### Function for venues and getting top 5 venue types for each neighborhood

In [47]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [48]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# Create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,5 Corners District,Pizza Place,Athletics & Sports,Convenience Store,Smoke Shop,French Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food
1,Baybrook Management District,Business Service,Science Museum,Department Store,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,French Restaurant
2,Downtown District,History Museum,Plaza,Gift Shop,Liquor Store,Music Venue,Bakery,Pizza Place,Lounge,Italian Restaurant,French Restaurant
3,Greater East End Management District,Wine Bar,Yoga Studio,Fried Chicken Joint,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,French Restaurant
4,Greater Northside Management District,Business Service,Food,Yoga Studio,Donut Shop,Fast Food Restaurant,Flower Shop,Food Truck,French Restaurant,Fried Chicken Joint,Gas Station


### K means clustering

In [49]:
kclusters = 7
toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10] 
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

### Merge dataframes

In [50]:
toronto_merged = df
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Postcode')
toronto_merged.dropna(inplace = True)
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)
toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,5 Corners District,"Almeda, Fondren Gardens, Winchester","Interstate 610 to the north, Beltway 8 to the south, and Texas State Highway 288 to the east",29.987904,-95.479542,3,Pizza Place,Athletics & Sports,Convenience Store,Smoke Shop,French Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food
1,Baybrook Management District,Baybrook Mall\r\n,"Farm-to-Market Road 528 to the south, Interstate 45 to the east and north",29.576304,-95.128733,5,Business Service,Science Museum,Department Store,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,French Restaurant
2,Downtown District,Downtown,"Enclosed by Interstate 45, Interstate 10, and Interstate 69 (U.S. Highway 59)",32.782611,-96.808781,3,History Museum,Plaza,Gift Shop,Liquor Store,Music Venue,Bakery,Pizza Place,Lounge,Italian Restaurant,French Restaurant
5,Greater East End Management District,"East End, Magnolia Park, and Harrisburg","East of the East Downtown Management District, north of Interstate 45, south of Clinton Drive, and east of Interstate 610",29.741647,-95.256811,2,Wine Bar,Yoga Studio,Fried Chicken Joint,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,French Restaurant
6,Greater Northside Management District,"Near Northside, part of the Heights, Independence Heights, Northline",Generally east of Interstate 45 and west of Interstate 69 from Interstate 10 north to Little York Road,30.004339,-95.412106,4,Business Service,Food,Yoga Studio,Donut Shop,Fast Food Restaurant,Flower Shop,Food Truck,French Restaurant,Fried Chicken Joint,Gas Station


### Create clusters on maps using folium

In [51]:
# Create map
map_clusters = folium.Map(location=[tor_lat, tor_lng], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Postcode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' (Cluster ' + str(cluster) + ')', parse_html=True)
    map_clusters.add_child(
        folium.features.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7))
       
map_clusters

## Checking clusters

### Cluster 0

In [63]:
toronto_merged[toronto_merged['Cluster Labels']==0]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,International Management District,Alief and Little Saigon\r\n,"Westpark Tollway to the north, Beltway 8 to the east, Bissonnet Street and Bellfort Street to the south, Texas State Highway 6 to the west",29.941288,-95.327246,0,Hotel,American Restaurant,Breakfast Spot,Gym / Fitness Center,Diner,Intersection,BBQ Joint,Bakery,Aquarium,History Museum
13,Southwest Management District,"Sharpstown, Mahatma Gandhi District, portions of Chinatown","Westpark Tollway to the north, Hillcroft Road to the east, Bissonnet Street to the south, Beltway 8 to the west",29.93768,-95.392751,0,Hotel,Hotel Pool,New American Restaurant,Fried Chicken Joint,Gastropub,Hotel Bar,American Restaurant,Southern / Soul Food Restaurant,Food,Department Store
14,Spring Branch Management District,Spring Branch,"Tanner Road to the north, Hempstead Highway to the east, Interstate 10 to the south, and Beltway 8 to the west",29.93768,-95.392751,0,Hotel,Hotel Pool,New American Restaurant,Fried Chicken Joint,Gastropub,Hotel Bar,American Restaurant,Southern / Soul Food Restaurant,Food,Department Store


### Cluster 1

In [64]:
toronto_merged[toronto_merged['Cluster Labels']==1]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Upper Kirby District,Upper Kirby,"Westheimer Road to the north, Shepherd Drive to the east, Bissonnet Street to the south, Buffalo Speedway to the west",29.755609,-95.416672,1,Museum,Yoga Studio,French Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,Fried Chicken Joint


### Cluster 2

In [65]:
toronto_merged[toronto_merged['Cluster Labels']==2]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Greater East End Management District,"East End, Magnolia Park, and Harrisburg","East of the East Downtown Management District, north of Interstate 45, south of Clinton Drive, and east of Interstate 610",29.741647,-95.256811,2,Wine Bar,Yoga Studio,Fried Chicken Joint,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,French Restaurant


### Cluster 3

In [66]:
toronto_merged[toronto_merged['Cluster Labels']==3]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,5 Corners District,"Almeda, Fondren Gardens, Winchester","Interstate 610 to the north, Beltway 8 to the south, and Texas State Highway 288 to the east",29.987904,-95.479542,3,Pizza Place,Athletics & Sports,Convenience Store,Smoke Shop,French Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food
2,Downtown District,Downtown,"Enclosed by Interstate 45, Interstate 10, and Interstate 69 (U.S. Highway 59)",32.782611,-96.808781,3,History Museum,Plaza,Gift Shop,Liquor Store,Music Venue,Bakery,Pizza Place,Lounge,Italian Restaurant,French Restaurant
7,Houston Southeast,"Third Ward, the Texas Medical Center, Riverside Terrace, South Union / OST","East of Main Street and Interstate 69, south of Interstate 45, and west of Texas State Highway 35 (Spur 5)",29.767424,-95.366939,3,Park,Concert Hall,Seafood Restaurant,Dance Studio,Monument / Landmark,Coffee Shop,Public Art,Sculpture Garden,Aquarium,Train Station
9,Memorial Management District,Memorial City,Adjacent to Interstate 10 east of Beltway 8,29.935417,-95.458328,3,Gas Station,Vietnamese Restaurant,Intersection,Fast Food Restaurant,Market,Seafood Restaurant,Fried Chicken Joint,French Restaurant,Discount Store,Donut Shop
10,Midtown Houston,Midtown[5],"South of Interstate 45, west and north of Interstate 69",29.741415,-95.353201,3,Yoga Studio,Arts & Crafts Store,Gym / Fitness Center,Gym,Flower Shop,Bar,Food,Modern European Restaurant,French Restaurant,Donut Shop
11,Near Northwest Management District,Greater Inwood\r\n,"Tomball Parkway to the north, T. C. Jester Road to the east, Pinemont Road to the south, Hollister Road to the west",29.919558,-95.493324,3,Fast Food Restaurant,Food Truck,Discount Store,Pizza Place,Mexican Restaurant,Seafood Restaurant,Pawn Shop,Donut Shop,Mobile Phone Shop,Clothing Store
12,North Houston District,Greenspoint,Centered around the junction of Interstate 45 and Beltway 8,29.944719,-95.416074,3,Shoe Store,Pizza Place,Café,Department Store,Clothing Store,Gym,Movie Theater,Supplement Shop,Shopping Mall,Mexican Restaurant


### Cluster 4

In [67]:
toronto_merged[toronto_merged['Cluster Labels']==4]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Greater Northside Management District,"Near Northside, part of the Heights, Independence Heights, Northline",Generally east of Interstate 45 and west of Interstate 69 from Interstate 10 north to Little York Road,30.004339,-95.412106,4,Business Service,Food,Yoga Studio,Donut Shop,Fast Food Restaurant,Flower Shop,Food Truck,French Restaurant,Fried Chicken Joint,Gas Station


### Cluster 5

In [68]:
toronto_merged[toronto_merged['Cluster Labels']==5]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Baybrook Management District,Baybrook Mall\r\n,"Farm-to-Market Road 528 to the south, Interstate 45 to the east and north",29.576304,-95.128733,5,Business Service,Science Museum,Department Store,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck,French Restaurant


### Cluster 6 

In [69]:
toronto_merged[toronto_merged['Cluster Labels']==6]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Westchase District,Westchase,"Generally Westheimer Road to the north, Gessner Road to the east, Westpark Tollway to the south, and Wilcrest Drive to the west",29.728413,-95.551102,6,Deli / Bodega,Pizza Place,Medical Supply Store,French Restaurant,Discount Store,Donut Shop,Fast Food Restaurant,Flower Shop,Food,Food Truck
