## IBM Applied Data Science Capstone
### The Battle of Neighborhoods: Venue Comparison in the Dallas-Fort Worth Metroplex

In [1]:
# Import libaries

# Library to handle data in a vectorized manner
import numpy as np

# Library for data analysis
!conda install -c conda-forge geopy --yes
import pandas as pd

# Library to handle JSON files
import json 

# Tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize

# Convert an address into latitude and longitude values
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

# Library to handle requests
import requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# Import k-means from clustering stage
from sklearn.cluster import KMeans

# Map rendering library
!conda install -c conda-forge folium=0.10.1 --yes 
import folium

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.0

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    scikit-learn-0.20.1        |   py36h22eb022_0         5.7 MB
    liblapack-3.8.0            |      11_openblas          10 KB  conda-forge
    liblapacke-3.8.0           |      11_openblas          10 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    libopenblas-0.3.6          |       h5a2b251_2         7.7 MB
    numpy-1.17.3               |   py36h95a1406_0         5.2 MB  conda-forge
    scipy-1.4.1                |   py36h921218d_0        

# Webscraping data

In [2]:
# Create Dallas dataframe
csv_url_dallas = 'https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&q=dallas&refine.state=TX&timezone=America/Chicago&use_labels_for_header=true&csv_separator=%3B'
df_dallas = pd.read_csv(csv_url_dallas, sep = ';', engine = 'python')

# Drop unneeded columns
df_dallas.drop(['State', 'Timezone', 'Daylight savings time flag', 'geopoint'], axis = 1, inplace = True)

#Rename column
df_dallas.columns = ['Zipcode', 'City', 'Latitude', 'Longitude']
df_dallas

Unnamed: 0,Zipcode,City,Latitude,Longitude
0,75294,Dallas,32.767268,-96.777626
1,75255,Dallas,32.669783,-96.614921
2,75374,Dallas,32.767268,-96.777626
3,75252,Dallas,32.998132,-96.790880
4,75275,Dallas,32.767268,-96.777626
...,...,...,...,...
118,75233,Dallas,32.704398,-96.872220
119,75320,Dallas,32.767268,-96.777626
120,75380,Dallas,32.767268,-96.777626
121,75277,Dallas,32.767268,-96.777626


In [3]:
# Get names of indexes where Latitude = 32.767268
indexnames = df_dallas[df_dallas['Latitude'] == 32.767268].index
# Drop those rows
df_dallas.drop(indexnames, inplace = True)

# Get names of indexes where City = Lake Dallas
indexnames = df_dallas[df_dallas['City'] == 'Lake Dallas'].index
# Drop those rows
df_dallas.drop(indexnames, inplace = True)

df_dallas

Unnamed: 0,Zipcode,City,Latitude,Longitude
1,75255,Dallas,32.669783,-96.614921
3,75252,Dallas,32.998132,-96.79088
5,75202,Dallas,32.77988,-96.80502
10,75270,Dallas,32.78133,-96.80198
11,75220,Dallas,32.867977,-96.86306
12,75234,Dallas,32.925975,-96.88322
14,75215,Dallas,32.76103,-96.77035
15,75231,Dallas,32.874317,-96.74764
16,75251,Dallas,32.919104,-96.77497
20,75214,Dallas,32.825628,-96.74872


In [4]:
print('The shape of the Dallas dataframe is', df_dallas.shape)

The shape of the Dallas dataframe is (52, 4)


In [5]:
# Create Fort Worth dataframe
csv_url_fortworth = 'https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/download/?format=csv&q=fort+worth&refine.state=TX&timezone=America/Chicago&use_labels_for_header=true&csv_separator=%3B'
df_fortworth = pd.read_csv(csv_url_fortworth, sep = ';', engine = 'python')

# Drop columns
df_fortworth.drop(['State', 'Timezone', 'Daylight savings time flag', 'geopoint'], axis = 1, inplace = True)

#Rename column
df_fortworth.columns = ['Zipcode', 'City', 'Latitude', 'Longitude']
df_fortworth

Unnamed: 0,Zipcode,City,Latitude,Longitude
0,76107,Fort Worth,32.738481,-97.38424
1,76179,Fort Worth,32.876475,-97.41249
2,76137,Fort Worth,32.86814,-97.28566
3,76345,Fort Worth,32.38253,-98.404816
4,76177,Fort Worth,32.949819,-97.31406
5,76129,Fort Worth,32.771419,-97.291484
6,76191,Fort Worth,32.771419,-97.291484
7,76114,Fort Worth,32.781329,-97.40099
8,76199,Fort Worth,32.771419,-97.291484
9,76103,Fort Worth,32.745681,-97.26563


In [6]:
# Get names of indexes where Latitude = 32.771419
indexnames = df_fortworth[df_fortworth['Latitude'] == 32.771419].index
# Drop those rows
df_fortworth.drop(indexnames, inplace = True)
df_fortworth

Unnamed: 0,Zipcode,City,Latitude,Longitude
0,76107,Fort Worth,32.738481,-97.38424
1,76179,Fort Worth,32.876475,-97.41249
2,76137,Fort Worth,32.86814,-97.28566
3,76345,Fort Worth,32.38253,-98.404816
4,76177,Fort Worth,32.949819,-97.31406
7,76114,Fort Worth,32.781329,-97.40099
9,76103,Fort Worth,32.745681,-97.26563
10,76118,Fort Worth,32.80533,-97.22264
11,76110,Fort Worth,32.706331,-97.33787
13,76119,Fort Worth,32.691033,-97.26479


In [7]:
print('The shape of the Fort Worth dataframe is', df_fortworth.shape)

The shape of the Fort Worth dataframe is (32, 4)


In [8]:
# Concatenate Dallas and Fort Worth dataframes
df_dfw = pd.concat([df_dallas, df_fortworth], ignore_index = True)
print('The shape of the combined Dallas and Fort Worth dataframes is', df_dfw.shape)
df_dfw

The shape of the combined Dallas and Fort Worth dataframes is (84, 4)


Unnamed: 0,Zipcode,City,Latitude,Longitude
0,75255,Dallas,32.669783,-96.614921
1,75252,Dallas,32.998132,-96.790880
2,75202,Dallas,32.779880,-96.805020
3,75270,Dallas,32.781330,-96.801980
4,75220,Dallas,32.867977,-96.863060
...,...,...,...,...
79,76133,Fort Worth,32.655401,-97.377650
80,76131,Fort Worth,32.873017,-97.346220
81,76111,Fort Worth,32.778479,-97.300990
82,76112,Fort Worth,32.744032,-97.219570


# Acquiring data through Foursquare API

In [33]:
# Client ID and Secret removed for sharing
CLIENT_ID = 'Insert ID' # Foursquare ID
CLIENT_SECRET = 'Insert Secret' # Foursquare Secret
VERSION = '20180604'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: Insert ID
CLIENT_SECRET:Insert Secret


In [10]:
# Limit of number of venues returned by Foursquare API
LIMIT = 150
# Define radius
radius = 1000
def getNearbyVenues(names, cities, latitudes, longitudes):
    
    venues_list = []
    for name, city, lat, lng in zip(names, cities, latitudes, longitudes):
        print(name)
            
        # Create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # Make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # Return only relevant information for each nearby venue
        venues_list.append([(
            city,
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['City',
                             'Zipcode', 
                             'Zipcode Latitude', 
                             'Zipcode Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Venue Category']
    
    return(nearby_venues)

In [11]:
# Retrieving venues in Zipcode regions
dfw_venues = getNearbyVenues(names = df_dfw['Zipcode'],
                             cities = df_dfw['City'],
                             latitudes = df_dfw['Latitude'],
                             longitudes = df_dfw['Longitude']
                            )
print('dfw_venues shape', dfw_venues.shape)
dfw_venues.head()

75255
75252
75202
75270
75220
75234
75215
75231
75251
75214
75210
75246
75216
75238
75247
75207
75223
75287
75217
75212
75241
75244
75253
75245
75204
75226
75205
75230
75254
75221
75237
75211
75218
75203
75219
75206
75235
75243
75224
75228
75209
75201
75229
75232
75240
75249
75236
75248
75225
75208
75233
75227
76107
76179
76137
76345
76177
76114
76103
76118
76110
76119
76120
76115
76132
76148
76153
76102
76123
76109
76105
76104
76140
76106
76134
76126
76155
76108
76116
76133
76131
76111
76112
76135
dfw_venues shape (2383, 8)


Unnamed: 0,City,Zipcode,Zipcode Latitude,Zipcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Dallas,75255,32.669783,-96.614921,Sid's Food Mart,32.669854,-96.614021,Deli / Bodega
1,Dallas,75255,32.669783,-96.614921,Compressors Unlimited International,32.666678,-96.613758,Home Service
2,Dallas,75255,32.669783,-96.614921,Los Potrillos,32.669117,-96.609795,Mexican Restaurant
3,Dallas,75252,32.998132,-96.79088,Starbucks,32.998742,-96.794237,Coffee Shop
4,Dallas,75252,32.998132,-96.79088,Jamba Juice,32.998554,-96.794633,Juice Bar


In [12]:
# Amount of unique categories 
print('There are {} uniques categories.'.format(len(dfw_venues['Venue Category'].unique())))

There are 269 uniques categories.


In [13]:
# Most frequent categories
dfw_venues.groupby('Venue Category').count().sort_values('Venue', ascending = False)

Unnamed: 0_level_0,City,Zipcode,Zipcode Latitude,Zipcode Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Mexican Restaurant,113,113,113,113,113,113,113
Fast Food Restaurant,105,105,105,105,105,105,105
Pizza Place,73,73,73,73,73,73,73
Coffee Shop,67,67,67,67,67,67,67
Convenience Store,64,64,64,64,64,64,64
...,...,...,...,...,...,...,...
Laundry Service,1,1,1,1,1,1,1
Accessories Store,1,1,1,1,1,1,1
Jewelry Store,1,1,1,1,1,1,1
Irish Pub,1,1,1,1,1,1,1


In [14]:
# One hot encoding
dfw_venues_onehot = pd.get_dummies(dfw_venues[['Venue Category']], prefix = "", prefix_sep = "")

# Add Zipcode column back to dataframe
col1 = dfw_venues['City']
col2 = dfw_venues['Zipcode'] 
 
# Move Zipcode column to the first column
dfw_venues_onehot.insert(0, 'Zipcode', col2)
dfw_venues_onehot.insert(0, 'City', col1)

dfw_venues_onehot

Unnamed: 0,City,Zipcode,ATM,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,...,Warehouse Store,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Dallas,75255,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Dallas,75255,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Dallas,75255,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Dallas,75252,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Dallas,75252,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2378,Fort Worth,76135,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2379,Fort Worth,76135,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2380,Fort Worth,76135,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2381,Fort Worth,76135,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
# Group rows by Zipcode and take the mean of the frequency of occurrence of each category
dfw_venues_grouped = dfw_venues_onehot.groupby(['City', 'Zipcode']).mean().reset_index()
dfw_venues_grouped

Unnamed: 0,City,Zipcode,ATM,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,...,Warehouse Store,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,Dallas,75201,0.0,0.0,0.0,0.060000,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.020000,0.0,0.00
1,Dallas,75202,0.0,0.0,0.0,0.030000,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.01,0.000000,0.000000,0.0,0.00
2,Dallas,75203,0.0,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.05
3,Dallas,75204,0.0,0.0,0.0,0.045455,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.030303,0.0,0.00,0.000000,0.030303,0.0,0.00
4,Dallas,75205,0.0,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.034483,0.000000,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,Fort Worth,76148,0.0,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00
79,Fort Worth,76153,0.0,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00
80,Fort Worth,76155,0.0,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00
81,Fort Worth,76177,0.0,0.0,0.0,0.000000,0.0,0.00,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.00,0.000000,0.000000,0.0,0.00


In [16]:
# View each Zipcode region with top 5 venues
num_top_venues = 5

for hood in dfw_venues_grouped['Zipcode']:
    print("----"+str(hood)+"----")
    temp = dfw_venues_grouped[dfw_venues_grouped['Zipcode'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending = False).reset_index(drop = True).head(num_top_venues))
    print('\n')

----75201----
                     venue      freq
0                  Zipcode  75201.00
1                    Hotel      0.07
2      American Restaurant      0.06
3  New American Restaurant      0.05
4              Coffee Shop      0.05


----75202----
                venue      freq
0             Zipcode  75202.00
1               Hotel      0.12
2  Mexican Restaurant      0.05
3               Plaza      0.04
4         Coffee Shop      0.04


----75203----
                  venue      freq
0               Zipcode  75203.00
1    Light Rail Station      0.15
2             Gift Shop      0.10
3  Fast Food Restaurant      0.10
4           Bus Station      0.05


----75204----
                  venue      freq
0               Zipcode  75204.00
1           Coffee Shop      0.09
2    Mexican Restaurant      0.05
3   American Restaurant      0.05
4  Fast Food Restaurant      0.05


----75205----
                venue      freq
0             Zipcode  75205.00
1      Clothing Store      0.14
2   

In [17]:
# Sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [18]:
# Create new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['City', 'Zipcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind + 1))

# Create a new dataframe
zipcode_venues_sorted = pd.DataFrame(columns = columns)
#zipcode_venues_sorted['City'] = dfw_venues_grouped['City']
zipcode_venues_sorted['City'] = dfw_venues_grouped['City']
zipcode_venues_sorted['Zipcode'] = dfw_venues_grouped['Zipcode']

for ind in np.arange(dfw_venues_grouped.shape[0]):
    zipcode_venues_sorted.iloc[ind, 2:] = return_most_common_venues(dfw_venues_grouped.iloc[ind, :], num_top_venues)

zipcode_venues_sorted.head()

Unnamed: 0,City,Zipcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Dallas,75201,Hotel,American Restaurant,New American Restaurant,Steakhouse,Coffee Shop,Food Truck,Japanese Restaurant,Performing Arts Venue,Mediterranean Restaurant,Mexican Restaurant
1,Dallas,75202,Hotel,Mexican Restaurant,Coffee Shop,Steakhouse,Cocktail Bar,Plaza,Park,History Museum,American Restaurant,French Restaurant
2,Dallas,75203,Light Rail Station,Fast Food Restaurant,Gift Shop,Taco Place,Mexican Restaurant,Gas Station,Paper / Office Supplies Store,Home Service,Food,Zoo Exhibit
3,Dallas,75204,Coffee Shop,Convenience Store,Fast Food Restaurant,Mexican Restaurant,American Restaurant,Restaurant,Park,Pizza Place,Sports Bar,Pharmacy
4,Dallas,75205,Clothing Store,Boutique,Golf Course,Athletics & Sports,Bank,Men's Store,Gym / Fitness Center,Gym,Grocery Store,Steakhouse


# k-means clustering

In [19]:
# Set number of clusters
kclusters = 8

dfw_grouped_clustering = dfw_venues_grouped.drop(['City','Zipcode'], 1)

# Run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(dfw_grouped_clustering)

# Check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 0, 2, 2, 2, 0, 2, 2, 0], dtype=int32)

In [20]:
# Add clustering labels
# Uncomment to add cluster labels
zipcode_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dfw_merged = df_dfw

# Merge dfw_grouped with dfw_data to add latitude/longitude for each Zipcode
dfw_merged_all = dfw_merged.join(zipcode_venues_sorted.set_index(['City', 'Zipcode']), on = ['City', 'Zipcode'])

dfw_merged_all

# Drop rows with NaN values
dfw_merged_all.dropna(inplace = True)
dfw_merged_all['Cluster Labels'] = dfw_merged_all['Cluster Labels'].astype(int)
dfw_merged_all

Unnamed: 0,Zipcode,City,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75255,Dallas,32.669783,-96.614921,1,Deli / Bodega,Home Service,Mexican Restaurant,Food Truck,Flower Shop,Fondue Restaurant,Food,Food Court,Food Service,French Restaurant
1,75252,Dallas,32.998132,-96.790880,2,Mexican Restaurant,Sandwich Place,Nail Salon,Gym / Fitness Center,Mediterranean Restaurant,Bank,Convenience Store,Coffee Shop,Southern / Soul Food Restaurant,Burger Joint
2,75202,Dallas,32.779880,-96.805020,2,Hotel,Mexican Restaurant,Coffee Shop,Steakhouse,Cocktail Bar,Plaza,Park,History Museum,American Restaurant,French Restaurant
3,75270,Dallas,32.781330,-96.801980,2,Hotel,Coffee Shop,Mexican Restaurant,Plaza,Steakhouse,Park,Cocktail Bar,Bar,Gym,History Museum
4,75220,Dallas,32.867977,-96.863060,6,Mexican Restaurant,Pizza Place,Grocery Store,Convenience Store,Business Service,Fast Food Restaurant,Bank,Chinese Restaurant,Seafood Restaurant,Mobile Phone Shop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,76133,Fort Worth,32.655401,-97.377650,0,Fast Food Restaurant,Breakfast Spot,Donut Shop,Fried Chicken Joint,Park,Gym / Fitness Center,Pawn Shop,Gymnastics Gym,Baseball Field,Chinese Restaurant
80,76131,Fort Worth,32.873017,-97.346220,0,Sandwich Place,Coffee Shop,Home Service,Mobile Phone Shop,Mexican Restaurant,Grocery Store,Pharmacy,Fast Food Restaurant,Market,Salon / Barbershop
81,76111,Fort Worth,32.778479,-97.300990,6,Mexican Restaurant,Pharmacy,Taco Place,Fast Food Restaurant,Bakery,Bar,Café,Cajun / Creole Restaurant,Park,Sandwich Place
82,76112,Fort Worth,32.744032,-97.219570,0,Pizza Place,Discount Store,Fast Food Restaurant,Pharmacy,Grocery Store,Fried Chicken Joint,Supermarket,Bookstore,Food,Sandwich Place


# Visualization

In [21]:
# Retrieve Dallas coordiantes
dallas_address = 'Dallas, TX'
geolocator = Nominatim(user_agent="dfw_explorer")
dallas_location = geolocator.geocode(dallas_address)
dallas_latitude = dallas_location.latitude
dallas_longitude = dallas_location.longitude
print('The geograpical coordinate of Dallas are {}, {}.'.format(dallas_latitude, dallas_longitude))

# Retrieve Fort Worth coordiantes
fortworth_address = 'Fort Worth, TX'
fortworth_location = geolocator.geocode(fortworth_address)
fortworth_latitude = fortworth_location.latitude
fortworth_longitude = fortworth_location.longitude
print('The geograpical coordinate of Fort Worth are {}, {}.'.format(fortworth_latitude, fortworth_longitude))

The geograpical coordinate of Dallas are 32.7762719, -96.7968559.
The geograpical coordinate of Fort Worth are 32.753177, -97.3327459.


In [22]:
dallas_merged_all = dfw_merged_all[dfw_merged_all['City'] == 'Dallas']
fortworth_merged_all = dfw_merged_all[dfw_merged_all['City'] == 'Fort Worth']

In [23]:
# Create Dallas map
kclusters = 8
map_clusters = folium.Map(location = [dallas_latitude, dallas_longitude], zoom_start = 11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dallas_merged_all["Latitude"], dallas_merged_all["Longitude"],dallas_merged_all["Zipcode"], dallas_merged_all["Cluster Labels"]):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster + 1), parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius = 15,
        popup = label,
        color = rainbow[cluster-1],
        fill = True,
        fill_color = rainbow[cluster-1],
        fill_opacity = 0.7).add_to(map_clusters)

map_clusters

In [24]:
# Create Fort Worth map
kclusters = 8
map_clusters = folium.Map(location = [fortworth_latitude, fortworth_longitude], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(fortworth_merged_all['Latitude'], fortworth_merged_all['Longitude'], fortworth_merged_all['Zipcode'], fortworth_merged_all['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster + 1), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius = 15,
        popup = label,
        color = rainbow[cluster-1],
        fill = True,
        fill_color = rainbow[cluster-1],
        fill_opacity = 0.7).add_to(map_clusters)

map_clusters

In [25]:
#Cluster 1
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 0, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,75234,32.925975,Baseball Field,Breakfast Spot,Mexican Restaurant,Pizza Place,Soccer Field,Thrift / Vintage Store,Sandwich Place,Big Box Store,Chinese Restaurant,Grocery Store
7,75231,32.874317,Baseball Field,Pizza Place,Video Game Store,Movie Theater,Mexican Restaurant,Fast Food Restaurant,Mobile Phone Shop,Chinese Restaurant,Smoke Shop,Golf Driving Range
10,75210,32.77103,Fried Chicken Joint,Music Venue,Snack Place,Thrift / Vintage Store,Park,Tram Station,Light Rail Station,Fast Food Restaurant,Opera House,Convenience Store
11,75246,32.791878,Mexican Restaurant,Fast Food Restaurant,Convenience Store,Sandwich Place,Discount Store,Dive Bar,Pizza Place,Taco Place,Bar,Smoke Shop
12,75216,32.710082,Fried Chicken Joint,Grocery Store,Mobile Phone Shop,Shoe Store,Discount Store,Rental Service,Convenience Store,Fast Food Restaurant,Pizza Place,Pharmacy
13,75238,32.873926,Discount Store,Convenience Store,Fast Food Restaurant,Breakfast Spot,Roller Rink,Bar,Mexican Restaurant,Burger Joint,Burmese Restaurant,Sandwich Place
15,75207,32.78643,Art Gallery,Furniture / Home Store,Antique Shop,Convenience Store,Fast Food Restaurant,Pizza Place,Italian Restaurant,Sandwich Place,Mexican Restaurant,Gym / Fitness Center
17,75287,32.998786,Bar,Fried Chicken Joint,Gym / Fitness Center,Health & Beauty Service,General Entertainment,Sports Bar,Boxing Gym,Dessert Shop,Supermarket,Fast Food Restaurant
18,75217,32.72238,Nightclub,Convenience Store,Home Service,Food,Light Rail Station,Fast Food Restaurant,Garden Center,Garden,Furniture / Home Store,Fried Chicken Joint
21,75244,32.922624,Furniture / Home Store,Gym / Fitness Center,Fast Food Restaurant,Electronics Store,Sandwich Place,Mexican Restaurant,Steakhouse,Bike Shop,Big Box Store,Outlet Store


In [26]:
#Cluster 2
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 1, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,75255,32.669783,Deli / Bodega,Home Service,Mexican Restaurant,Food Truck,Flower Shop,Fondue Restaurant,Food,Food Court,Food Service,French Restaurant


In [27]:
#Cluster 3
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 2, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,75252,32.998132,Mexican Restaurant,Sandwich Place,Nail Salon,Gym / Fitness Center,Mediterranean Restaurant,Bank,Convenience Store,Coffee Shop,Southern / Soul Food Restaurant,Burger Joint
2,75202,32.77988,Hotel,Mexican Restaurant,Coffee Shop,Steakhouse,Cocktail Bar,Plaza,Park,History Museum,American Restaurant,French Restaurant
3,75270,32.78133,Hotel,Coffee Shop,Mexican Restaurant,Plaza,Steakhouse,Park,Cocktail Bar,Bar,Gym,History Museum
6,75215,32.76103,Recreation Center,Pizza Place,Gym Pool,Steakhouse,Photography Studio,Restaurant,Coffee Shop,Fast Food Restaurant,Flower Shop,Fondue Restaurant
8,75251,32.919104,Hotel,Gym,Fast Food Restaurant,Pharmacy,Burger Joint,Coffee Shop,Park,Bank,Taco Place,Mexican Restaurant
9,75214,32.825628,Men's Store,Grocery Store,Bar,Shop & Service,Cosmetics Shop,Spa,Plaza,Moving Target,Fondue Restaurant,Food
14,75247,32.817978,American Restaurant,Hotel,Rental Car Location,Video Store,Coffee Shop,Bookstore,Strip Club,Restaurant,Paper / Office Supplies Store,Bakery
24,75204,32.800333,Coffee Shop,Convenience Store,Fast Food Restaurant,Mexican Restaurant,American Restaurant,Restaurant,Park,Pizza Place,Sports Bar,Pharmacy
25,75226,32.783978,Bar,Dive Bar,American Restaurant,Coffee Shop,Mexican Restaurant,Nightclub,Rock Club,Pizza Place,Brewery,Burger Joint
26,75205,32.836094,Clothing Store,Boutique,Golf Course,Athletics & Sports,Bank,Men's Store,Gym / Fitness Center,Gym,Grocery Store,Steakhouse


In [28]:
#Cluster 4
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 3, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
75,76126,32.649476,Bar,Garden Center,Zoo Exhibit,French Restaurant,Fondue Restaurant,Food,Food Court,Food Service,Food Truck,Fried Chicken Joint


In [29]:
#Cluster 5
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 4, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,75241,32.669383,Convenience Store,Park,Zoo Exhibit,French Restaurant,Fondue Restaurant,Food,Food Court,Food Service,Food Truck,Fried Chicken Joint


In [30]:
#Cluster 6
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 5, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,75245,32.922499,Park,Laundry Service,Zoo Exhibit,French Restaurant,Fondue Restaurant,Food,Food Court,Food Service,Food Truck,Fried Chicken Joint


In [31]:
#Cluster 7
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 6, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,75220,32.867977,Mexican Restaurant,Pizza Place,Grocery Store,Convenience Store,Business Service,Fast Food Restaurant,Bank,Chinese Restaurant,Seafood Restaurant,Mobile Phone Shop
16,75223,32.792879,Mexican Restaurant,Convenience Store,Fast Food Restaurant,Liquor Store,Vegetarian / Vegan Restaurant,Grocery Store,Tennis Court,Sandwich Place,Bakery,Fried Chicken Joint
19,75212,32.78238,Business Service,Dry Cleaner,Pizza Place,Thrift / Vintage Store,Locksmith,Mexican Restaurant,Convenience Store,Discount Store,Garden,Furniture / Home Store
31,75211,32.736931,Liquor Store,Mexican Restaurant,Convenience Store,Video Store,Bus Station,Snack Place,Food,Supermarket,Fast Food Restaurant,Taco Place
50,75233,32.704398,Storage Facility,Rental Service,Food,Taco Place,Mexican Restaurant,Food Truck,Flower Shop,Fondue Restaurant,Food Court,Food Service
51,75227,32.77003,Mexican Restaurant,Arts & Crafts Store,Bakery,BBQ Joint,Automotive Shop,Convenience Store,Park,Warehouse Store,Food Truck,Food
60,76110,32.706331,Mexican Restaurant,Pizza Place,Fast Food Restaurant,Thrift / Vintage Store,Grocery Store,Video Store,Basketball Court,Gym / Fitness Center,Salon / Barbershop,Dry Cleaner
63,76115,32.680333,Taco Place,Mexican Restaurant,Pharmacy,Tennis Court,Business Service,Bakery,Fast Food Restaurant,French Restaurant,Fried Chicken Joint,Furniture / Home Store
66,76153,32.667134,Fast Food Restaurant,Mexican Restaurant,Discount Store,Italian Restaurant,Pizza Place,Grocery Store,Park,Gas Station,Sandwich Place,Liquor Store
70,76105,32.724831,Mexican Restaurant,Seafood Restaurant,Discount Store,Flower Shop,Fried Chicken Joint,Grocery Store,Furniture / Home Store,Garden,French Restaurant,Farmers Market


In [32]:
#Cluster 8
dfw_merged_all.loc[dfw_merged_all['Cluster Labels'] == 7, dfw_merged_all.columns[[0] + [2] + list(range(5, dfw_merged_all.shape[1]))]]

Unnamed: 0,Zipcode,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
53,76179,32.876475,Theater,New American Restaurant,Lake,Café,Stadium,Music Venue,Gas Station,Garden Center,Garden,Fondue Restaurant
