In [1]:
import pandas as pd
import numpy as np

## Data Collection

In [2]:
# Webpage url                                                                                                               
url = 'https://en.wikipedia.org/wiki/List_of_counties_in_North_Carolina#cite_ref-6'

# Extract tables
dfs = pd.read_html(url)

df = dfs[1]
#Drop rows without a Borough
df.drop(columns = ['FIPS code[3]','Etymology[5]','County seat[4]','Est.[4]','Map','Origin[5]'],inplace = True)
df = df[df['County'] != 'Dare County']
df = df[df['County'] != 'Hyde County']
df.head()

Unnamed: 0,County,Pop.(2017 est.)[6],Area[4][7]
0,Alamance County,157844,"435 sq mi(1,127 km2)"
1,Alexander County,37159,263 sq mi(681 km2)
2,Alleghany County,10935,236 sq mi(611 km2)
3,Anson County,25531,"537 sq mi(1,391 km2)"
4,Ashe County,26833,"427 sq mi(1,106 km2)"


## Cleaning Data and Adding Features

In [3]:
df.rename(columns = {'Pop.(2017 est.)[6]':'Population','Area[4][7]':'Area(km2)'},inplace = True)
df.head()

Unnamed: 0,County,Population,Area(km2)
0,Alamance County,157844,"435 sq mi(1,127 km2)"
1,Alexander County,37159,263 sq mi(681 km2)
2,Alleghany County,10935,236 sq mi(611 km2)
3,Anson County,25531,"537 sq mi(1,391 km2)"
4,Ashe County,26833,"427 sq mi(1,106 km2)"


In [4]:
areas = df['Area(km2)'].values
print(areas)

['435\xa0sq\xa0mi(1,127\xa0km2)' '263\xa0sq\xa0mi(681\xa0km2)'
 '236\xa0sq\xa0mi(611\xa0km2)' '537\xa0sq\xa0mi(1,391\xa0km2)'
 '427\xa0sq\xa0mi(1,106\xa0km2)' '247\xa0sq\xa0mi(640\xa0km2)'
 '959\xa0sq\xa0mi(2,484\xa0km2)' '741\xa0sq\xa0mi(1,919\xa0km2)'
 '887\xa0sq\xa0mi(2,297\xa0km2)' '860\xa0sq\xa0mi(2,227\xa0km2)'
 '660\xa0sq\xa0mi(1,709\xa0km2)' '515\xa0sq\xa0mi(1,334\xa0km2)'
 '365\xa0sq\xa0mi(945\xa0km2)' '474\xa0sq\xa0mi(1,228\xa0km2)'
 '306\xa0sq\xa0mi(793\xa0km2)' '1,341\xa0sq\xa0mi(3,473\xa0km2)'
 '428\xa0sq\xa0mi(1,109\xa0km2)' '414\xa0sq\xa0mi(1,072\xa0km2)'
 '709\xa0sq\xa0mi(1,836\xa0km2)' '497\xa0sq\xa0mi(1,287\xa0km2)'
 '233\xa0sq\xa0mi(603\xa0km2)' '221\xa0sq\xa0mi(572\xa0km2)'
 '469\xa0sq\xa0mi(1,215\xa0km2)' '954\xa0sq\xa0mi(2,471\xa0km2)'
 '774\xa0sq\xa0mi(2,005\xa0km2)' '658\xa0sq\xa0mi(1,704\xa0km2)'
 '526\xa0sq\xa0mi(1,362\xa0km2)' '567\xa0sq\xa0mi(1,469\xa0km2)'
 '267\xa0sq\xa0mi(692\xa0km2)' '819\xa0sq\xa0mi(2,121\xa0km2)'
 '298\xa0sq\xa0mi(772\xa0km2)' '507\xa0

In [5]:
import math
int_areas = []
radius = []
for i in areas:
    i = int(i[i.find("(") + 1:len(i)-4].replace(',',''))
    int_areas.append(i)
    radius.append("%.f" % (math.sqrt((i/math.pi))*1000))
    
print(int_areas)
print(radius)

[1127, 681, 611, 1391, 1106, 640, 2484, 1919, 2297, 2227, 1709, 1334, 945, 1228, 793, 3473, 1109, 1072, 1836, 1287, 603, 572, 1215, 2471, 2005, 1704, 1362, 1469, 692, 2121, 772, 1313, 1070, 1282, 943, 896, 782, 1391, 689, 1704, 1893, 1557, 1437, 971, 932, 1015, 1546, 1279, 2062, 1225, 671, 1041, 795, 1155, 1344, 1171, 1194, 1414, 575, 1300, 1829, 1406, 850, 1427, 2354, 1039, 1466, 749, 2416, 852, 1046, 1696, 619, 2046, 1243, 2463, 1481, 1357, 1466, 2453, 831, 1046, 1181, 1393, 1401, 987, 1554, 1658, 699, 2220, 1150, 1098, 811, 1443, 1968, 969, 873, 811]
['18940', '14723', '13946', '21042', '18763', '14273', '28119', '24715', '27040', '26625', '23324', '20606', '17344', '19771', '15888', '33249', '18788', '18472', '24175', '20240', '13854', '13493', '19666', '28045', '25263', '23289', '20822', '21624', '14842', '25983', '15676', '20444', '18455', '20201', '17325', '16888', '15777', '21042', '14809', '23289', '24547', '22262', '21387', '17581', '17224', '17975', '22183', '20177', '25619'

In [6]:
df['Area(km2)'] = int_areas
df['Radius(m)'] = radius
df.head()

Unnamed: 0,County,Population,Area(km2),Radius(m)
0,Alamance County,157844,1127,18940
1,Alexander County,37159,681,14723
2,Alleghany County,10935,611,13946
3,Anson County,25531,1391,21042
4,Ashe County,26833,1106,18763


In [7]:
df.shape

(98, 4)

In [8]:
from geopy.geocoders import Nominatim
import folium
import requests

In [9]:
counties = df['County'].values
latitude = []
longitude = []
for i in counties:
    address = '{}, NC'.format(i)

    geolocator = Nominatim(user_agent="nc_explorer")
    location = geolocator.geocode(address)
    latitude.append(location.latitude)
    longitude.append(location.longitude)
print(latitude,longitude)

[36.0472941, 35.924738, 36.5029951, 34.9705848, 36.4393554, 36.0771285, 35.469331, 36.0608505, 34.627971, 34.0718195, 35.6292222, 35.7270403, 35.4045503, 35.9417876, 36.3106247, 34.8496625, 36.4035101, 35.6740563, 35.7151316, 35.1184149, 36.0768949, 35.0572652, 35.330702, 34.2814497, 35.0808574, 35.0461723, 36.3918723, 35.7902384, 35.9375154, 34.9464988, 36.0181316, 35.9213841, 36.1476399, 36.1028596, 35.2849659, 36.4412509, 35.359542, 36.3210127, 35.481724, 36.0875688, 36.2494056, 35.388093, 35.5456202, 35.3389062, 36.3560545, 35.0271856, 35.8101636, 35.2962391, 35.5285816, 35.0306301, 35.4691746, 35.2590371, 35.4866424, 35.6608869, 35.1436639, 35.8482034, 35.8134443, 35.2356385, 36.0001181, 35.3299572, 35.3054614, 35.992983, 34.2751052, 36.4168078, 34.7952348, 36.0605095, 35.1238031, 36.2280793, 34.5294394, 36.1739896, 36.3967816, 35.611481, 35.2779217, 35.7142874, 35.0288383, 34.6485079, 36.3926798, 35.6315952, 35.3994626, 35.0078434, 34.8628902, 35.3235477, 36.4120995, 36.4135582, 

In [10]:
df['Latitude'] = latitude
df['Longitude'] = longitude

In [12]:
df = df.astype({'Radius(m)': 'int64'})
df.shape

(98, 6)

In [113]:
df.head()

Unnamed: 0,County,Population,Area(km2),Radius(m),Latitude,Longitude
0,Alamance County,157844,1127,18940,36.047294,-79.400461
1,Alexander County,37159,681,14723,35.924738,-81.171632
2,Alleghany County,10935,611,13946,36.502995,-81.134084
3,Anson County,25531,1391,21042,34.970585,-80.095819
4,Ashe County,26833,1106,18763,36.439355,-81.507657


## Initial Visualization

In [13]:
address = 'NC'

geolocator = Nominatim(user_agent="nc_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_nc = folium.Map(location=[latitude, longitude], zoom_start=7)

In [14]:
for lat, lng, county in zip(df['Latitude'], df['Longitude'], df['County']):
    label = '{}, NC'.format(county)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_nc)  

In [15]:
map_nc

## Define all the algorithms needed to cluster similar counties

In [25]:
#Uses the foursquare api to find venues in a radius around a county
def getNearbyVenues(names, latitudes, longitudes, radius):
    
    venues_list=[]
    for name, lat, lng, rd in zip(names, latitudes, longitudes, radius):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            rd, 
            limit)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]["groups"][0]['items']
        except:
            print(requests.get(url).json()["response"])
        # return only relevant information for each nearby venue
        print(len(results))
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['County', 
                  'County Latitude', 
                  'County Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [139]:
#Get most common venues near counties
def return_most_common_venues(row,num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#Take the frousqaure api output and puts the data into dataframes so that it can be clustered and easily seen in a table
def prep_cluster(df,num_top_venues):
    df_venues = getNearbyVenues(names=df['County'],latitudes=df['Latitude'],longitudes=df['Longitude'],radius = df['Radius(m)'])
    df_onehot = pd.get_dummies(df_venues[['Venue Category']], prefix="", prefix_sep="")

    # add neighborhood column back to dataframe
    df_onehot['County'] = df_venues['County'] 

    # move neighborhood column to the first column
    fixed_columns = [df_onehot.columns[-1]] + list(df_onehot.columns[:-1])
    df_onehot = df_onehot[fixed_columns]
    df_grouped = df_onehot.groupby('County').mean().reset_index()
    indicators = ['st', 'nd', 'rd']

    # create columns according to number of top venues
    columns = ['County']
    for ind in np.arange(num_top_venues):
        try:
            columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
        except:
            columns.append('{}th Most Common Venue'.format(ind+1))

    # create a new dataframe to sort most common venues
    df_venues_sorted = pd.DataFrame(columns=columns)
    df_venues_sorted['County'] = df_grouped['County']

    for ind in np.arange(df_grouped.shape[0]):
        df_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_grouped.iloc[ind, :], num_top_venues)
    df_venues_sorted['Cluster Labels'] = ""
    df_clustering = df_grouped.drop('County', 1)
    return df_clustering,df_venues_sorted
#Does the actual clustering and returns a nice dataframe with all the data needed
def clusterk(df,df_clustering,kclusters,df_venues_sorted):
    # run k-means clustering
    kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_clustering)
    df_venues_sorted['Cluster Labels'] = kmeans.labels_

    df_merged = df

    # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
    df_merged = df_merged.join(df_venues_sorted.set_index('County'), on='County')

    return df_merged
def clustera(df,df_clustering,kclusters,df_venues_sorted):
    # run k-means clustering
    agglom = AgglomerativeClustering(n_clusters = kclusters, linkage = 'complete').fit(df_clustering)
    df_venues_sorted['Cluster Labels'] = agglom.labels_

    df_merged = df

    # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
    df_merged = df_merged.join(df_venues_sorted.set_index('County'), on='County')

    return df_merged

def clusterd(df,df_clustering,samples,epsilon,df_venues_sorted):
    # run k-means clustering
    db = DBSCAN(eps=epsilon, min_samples=samples).fit(df_clustering)
    df_venues_sorted['Cluster Labels'] = db.labels_
    labels = db.labels_
    realClusterNum=len(set(labels)) - (1 if -1 in labels else 0)
    clusterNum = len(set(labels))
    print(clusterNum)
    df_merged = df

    # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
    df_merged = df_merged.join(df_venues_sorted.set_index('County'), on='County')

    return df_merged

In [27]:
CLIENT_ID = 'BNJ5AYFFOMQSMI3BNK2CIDFLEI225NQL5FZRUSSQOK0OZWQM' # your Foursquare ID
CLIENT_SECRET = 'N3PUBTON3W0WWDGST5W3OYYGYNRXCGLDYSWLGA4BWKQ32VA5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
limit = 1000

In [28]:
df_clustering,df_venues = prep_cluster(df,10)

100
31
13
35
38
93
60
32
30
100
100
49
100
77
69
100
19
100
61
50
28
53
100
43
61
100
94
100
59
67
100
84
100
57
100
6
29
80
18
100
56
53
89
100
31
47
41
74
100
13
82
66
60
64
71
49
38
100
25
28
70
98
100
3
98
100
31
76
61
12
53
100
41
87
62
87
100
100
71
56
52
44
22
73
100
46
9
58
51
100
30
19
100
97
70
87
41
29


In [29]:
df_clustering.head()

Unnamed: 0,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Terminal,American Restaurant,Animal Shelter,Antique Shop,Apres Ski Bar,Aquarium,...,Waterfall,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,0.0,0.01,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.096774,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
df_venues.head()

Unnamed: 0,County,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Alamance County,American Restaurant,Mexican Restaurant,Coffee Shop,Pizza Place,Breakfast Spot,Sandwich Place,Italian Restaurant,BBQ Joint,Donut Shop,Japanese Restaurant,
1,Alexander County,Discount Store,Fast Food Restaurant,Gas Station,American Restaurant,Park,Pizza Place,Farmers Market,Electronics Store,Deli / Bodega,Big Box Store,
2,Alleghany County,Discount Store,Grocery Store,Sandwich Place,Scenic Lookout,Park,Fast Food Restaurant,Pizza Place,Food,Supermarket,Food & Drink Shop,
3,Anson County,Fast Food Restaurant,Discount Store,Gas Station,Sandwich Place,Ice Cream Shop,Mexican Restaurant,Big Box Store,Breakfast Spot,Fried Chicken Joint,Burger Joint,
4,Ashe County,Fast Food Restaurant,Discount Store,Grocery Store,Gas Station,Pizza Place,American Restaurant,Pharmacy,Mexican Restaurant,Big Box Store,Food,


## 4. Create and Visualize Clusters

In [131]:
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN

In [168]:
df_merged = 0
df_merged = clusterk(df,df_clustering,8,df_venues)
df_merged2 = clustera(df,df_clustering,7,df_venues)
#df_merged3 = clusterd(df,df_clustering,2,.3,df_venues)

In [112]:
import matplotlib.cm as cm
import matplotlib.colors as colors
kclusters = 8
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=7)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['County'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [169]:
kclusters = 7
map_clusters2 = folium.Map(location=[latitude, longitude], zoom_start=7)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged2['Latitude'], df_merged2['Longitude'], df_merged2['County'], df_merged2['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters2)
       
map_clusters2

In [137]:
kclusters = 2
map_clusters3 = folium.Map(location=[latitude, longitude], zoom_start=7)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged3['Latitude'], df_merged3['Longitude'], df_merged3['County'], df_merged3['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters3)
       
map_clusters3

## 5. Examine Clusters

In [103]:
df_merged.loc[df_merged['Cluster Labels'] == 0, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,Alexander County,37159,Discount Store,Fast Food Restaurant,Gas Station,American Restaurant,Park,Pizza Place,Farmers Market,Electronics Store,Deli / Bodega,Big Box Store,0
3,Anson County,25531,Fast Food Restaurant,Discount Store,Gas Station,Sandwich Place,Ice Cream Shop,Mexican Restaurant,Big Box Store,Breakfast Spot,Fried Chicken Joint,Burger Joint,0
4,Ashe County,26833,Fast Food Restaurant,Discount Store,Grocery Store,Gas Station,Pizza Place,American Restaurant,Pharmacy,Mexican Restaurant,Big Box Store,Food,0
7,Bertie County,19913,Fast Food Restaurant,Discount Store,Pizza Place,Supermarket,Pharmacy,Department Store,Sandwich Place,Mexican Restaurant,Bank,BBQ Joint,0
8,Bladen County,34130,Fast Food Restaurant,Sandwich Place,Discount Store,Pizza Place,Gas Station,Burger Joint,Pharmacy,Grocery Store,Clothing Store,Big Box Store,0
16,Caswell County,22833,Discount Store,Fast Food Restaurant,Supermarket,Gas Station,Tourist Information Center,Park,Campground,Zoo,Italian Restaurant,Sports Club,0
30,Duplin County,59350,Fast Food Restaurant,Discount Store,Sandwich Place,Pizza Place,Pharmacy,Golf Course,Fried Chicken Joint,Supermarket,Gas Station,Grocery Store,0
39,Greene County,21059,Gas Station,Discount Store,Fast Food Restaurant,Supermarket,Italian Restaurant,Pharmacy,Pizza Place,Sandwich Place,Mexican Restaurant,American Restaurant,0
45,Hertford County,24262,Fast Food Restaurant,Pharmacy,Discount Store,Supermarket,Sandwich Place,American Restaurant,Pizza Place,Bank,Café,Coffee Shop,0
46,Hoke County,52571,Fast Food Restaurant,Discount Store,Breakfast Spot,Pizza Place,Supermarket,Sandwich Place,Mexican Restaurant,BBQ Joint,Coffee Shop,Ice Cream Shop,0


In [104]:
df_merged.loc[df_merged['Cluster Labels'] == 1, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
11,Burke County,88898,American Restaurant,Fast Food Restaurant,State / Provincial Park,BBQ Joint,Brewery,Pizza Place,Grocery Store,Furniture / Home Store,Gas Station,Park,1
17,Catawba County,156182,American Restaurant,Grocery Store,Fast Food Restaurant,Mexican Restaurant,Pizza Place,Ice Cream Shop,Steakhouse,Discount Store,Sandwich Place,Hardware Store,1
21,Clay County,10753,American Restaurant,Fast Food Restaurant,Discount Store,Mexican Restaurant,Pizza Place,BBQ Joint,Pharmacy,Park,Gas Station,Supermarket,1
22,Cleveland County,97038,Discount Store,American Restaurant,Fast Food Restaurant,Pizza Place,Coffee Shop,Sandwich Place,Mexican Restaurant,Pharmacy,Breakfast Spot,Grocery Store,1
28,Davidson County,164118,BBQ Joint,Fast Food Restaurant,American Restaurant,Discount Store,Supermarket,Mexican Restaurant,Breakfast Spot,Sandwich Place,Pharmacy,Italian Restaurant,1
37,Graham County,8607,American Restaurant,Trail,Hotel,Fast Food Restaurant,Gas Station,Convenience Store,Resort,Discount Store,Road,Pharmacy,1
43,Haywood County,59854,Fast Food Restaurant,American Restaurant,Coffee Shop,Discount Store,Breakfast Spot,Supermarket,Pizza Place,Scenic Lookout,Mexican Restaurant,Ice Cream Shop,1
50,Johnston County,186308,Fast Food Restaurant,American Restaurant,Coffee Shop,Supermarket,Discount Store,BBQ Joint,Mexican Restaurant,Trail,Convenience Store,Burger Joint,1
54,Lincoln County,80504,Discount Store,American Restaurant,Fast Food Restaurant,Grocery Store,Sandwich Place,Restaurant,Coffee Shop,Italian Restaurant,Supermarket,Mexican Restaurant,1
56,Macon County,21347,American Restaurant,Hotel,Coffee Shop,Trail,Mexican Restaurant,Discount Store,BBQ Joint,Grocery Store,Restaurant,Sandwich Place,1


In [105]:
df_merged.loc[df_merged['Cluster Labels'] == 2, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
6,Beaufort County,47316,Discount Store,Sandwich Place,American Restaurant,Italian Restaurant,Fast Food Restaurant,Supermarket,Hot Dog Joint,Pharmacy,Burger Joint,BBQ Joint,2
14,Camden County,10336,Fast Food Restaurant,Discount Store,Pizza Place,Convenience Store,Seafood Restaurant,Bakery,American Restaurant,Hotel,Mexican Restaurant,Supermarket,2
18,Chatham County,68364,Pizza Place,Fast Food Restaurant,American Restaurant,Mexican Restaurant,Golf Course,Pharmacy,Grocery Store,Park,BBQ Joint,Seafood Restaurant,2
20,Chowan County,14370,Fast Food Restaurant,American Restaurant,Pharmacy,Hotel,Movie Theater,New American Restaurant,Food,Café,Outdoors & Recreation,Coffee Shop,2
29,Davie County,41766,Fast Food Restaurant,Discount Store,Convenience Store,Pizza Place,Mexican Restaurant,Sandwich Place,Gas Station,Golf Course,Supermarket,Italian Restaurant,2
32,Edgecombe County,54032,Fast Food Restaurant,Gas Station,BBQ Joint,Discount Store,Sandwich Place,Mexican Restaurant,Supermarket,Grocery Store,Italian Restaurant,Burger Joint,2
38,Granville County,58503,Fast Food Restaurant,Discount Store,Pizza Place,Pharmacy,American Restaurant,Supermarket,Italian Restaurant,Gas Station,Sandwich Place,Convenience Store,2
41,Halifax County,52300,Discount Store,Sandwich Place,Fast Food Restaurant,Gas Station,Pharmacy,Fried Chicken Joint,Supermarket,Hotel,American Restaurant,Ice Cream Shop,2
42,Harnett County,128753,Fast Food Restaurant,Sandwich Place,Mexican Restaurant,Gas Station,Fried Chicken Joint,Supermarket,Discount Store,Pharmacy,American Restaurant,Golf Course,2
52,Lee County,59805,Sandwich Place,American Restaurant,BBQ Joint,Supermarket,Pharmacy,Fast Food Restaurant,Mexican Restaurant,Steakhouse,Pizza Place,Discount Store,2


In [106]:
df_merged.loc[df_merged['Cluster Labels'] == 3, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
36,Gates County,11601,Discount Store,BBQ Joint,Diner,Juice Bar,Stables,Food & Drink Shop,Financial or Legal Service,Fish Market,Fishing Spot,Flea Market,3


In [107]:
df_merged.loc[df_merged['Cluster Labels'] == 4, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Alamance County,157844,American Restaurant,Mexican Restaurant,Coffee Shop,Pizza Place,Breakfast Spot,Sandwich Place,Italian Restaurant,BBQ Joint,Donut Shop,Japanese Restaurant,4
5,Avery County,17535,BBQ Joint,American Restaurant,Ski Area,Hotel,Pharmacy,Restaurant,Tourist Information Center,Trail,Mexican Restaurant,Italian Restaurant,4
9,Brunswick County,122586,Seafood Restaurant,Beach,American Restaurant,Ice Cream Shop,Mexican Restaurant,Golf Course,Sandwich Place,Italian Restaurant,Supermarket,Burger Joint,4
10,Buncombe County,252268,Brewery,Pizza Place,Bar,Grocery Store,Breakfast Spot,Bakery,BBQ Joint,Coffee Shop,American Restaurant,Park,4
12,Cabarrus County,196716,Fast Food Restaurant,Convenience Store,American Restaurant,Park,Coffee Shop,Mexican Restaurant,Sandwich Place,Pizza Place,BBQ Joint,Burger Joint,4
15,Carteret County,68699,Seafood Restaurant,Beach,Pizza Place,Mexican Restaurant,American Restaurant,Grocery Store,Bar,Fast Food Restaurant,Supermarket,Coffee Shop,4
24,Craven County,103374,Grocery Store,American Restaurant,Pizza Place,Mexican Restaurant,Bar,Fast Food Restaurant,Restaurant,Golf Course,BBQ Joint,Sandwich Place,4
25,Cumberland County,332766,Japanese Restaurant,Burger Joint,Sandwich Place,Bakery,Fast Food Restaurant,American Restaurant,Grocery Store,Coffee Shop,Café,Warehouse Store,4
26,Currituck County,25247,Beach,Farmers Market,American Restaurant,Gas Station,Convenience Store,Gift Shop,Pizza Place,Fast Food Restaurant,Resort,Restaurant,4
31,Durham County,300865,Coffee Shop,Fast Food Restaurant,Mexican Restaurant,Pizza Place,Brewery,Grocery Store,Sandwich Place,Burger Joint,Breakfast Spot,Supermarket,4


In [108]:
df_merged.loc[df_merged['Cluster Labels'] == 5, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
2,Alleghany County,10935,Discount Store,Grocery Store,Sandwich Place,Scenic Lookout,Park,Fast Food Restaurant,Pizza Place,Food,Supermarket,Food & Drink Shop,5
13,Caldwell County,81805,Discount Store,Fast Food Restaurant,Pizza Place,Mexican Restaurant,Sandwich Place,Grocery Store,Pharmacy,Supermarket,Gas Station,Fried Chicken Joint,5
19,Cherokee County,27463,Discount Store,Fast Food Restaurant,Coffee Shop,Pizza Place,Sandwich Place,Bar,Big Box Store,Other Great Outdoors,American Restaurant,Steakhouse,5
23,Columbus County,56589,Discount Store,Sandwich Place,Fast Food Restaurant,Gas Station,Seafood Restaurant,Supermarket,Restaurant,Pharmacy,Department Store,Fried Chicken Joint,5
34,Franklin County,63866,Discount Store,Fast Food Restaurant,American Restaurant,Supermarket,Sandwich Place,Convenience Store,Pharmacy,Fried Chicken Joint,Breakfast Spot,Gas Station,5
48,Iredell County,169798,Gas Station,Grocery Store,Discount Store,BBQ Joint,Park,Mexican Restaurant,Hardware Store,Soccer Field,Fast Food Restaurant,Sandwich Place,5
51,Jones County,9776,Discount Store,Kids Store,Sandwich Place,Gas Station,Fast Food Restaurant,Golf Course,Business Service,Grocery Store,Scenic Lookout,Food Court,5
55,McDowell County,34160,Discount Store,Mexican Restaurant,Sandwich Place,Fast Food Restaurant,Pizza Place,Rest Area,Grocery Store,Gym,Breakfast Spot,Gas Station,5
60,Mitchell County,15155,Discount Store,Grocery Store,Pizza Place,Vacation Rental,Burger Joint,Mountain,Campground,Mexican Restaurant,Mediterranean Restaurant,Fast Food Restaurant,5
80,Rutherford County,66523,Discount Store,Fast Food Restaurant,Sandwich Place,American Restaurant,Pharmacy,Convenience Store,Intersection,Japanese Restaurant,Pizza Place,Breakfast Spot,5


In [109]:
df_merged.loc[df_merged['Cluster Labels'] == 6, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
65,Northampton County,20426,Grocery Store,Discount Store,American Restaurant,Zoo Exhibit,Food & Drink Shop,Financial or Legal Service,Fish Market,Fishing Spot,Flea Market,Flower Shop,6


In [119]:
df_merged.loc[df_merged['Cluster Labels'] == 7, df_merged.columns[[0,1] + list(range(6, df_merged.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
88,Tyrrell County,4090,Tourist Information Center,Rental Service,Grocery Store,Supermarket,Discount Store,Other Great Outdoors,Coffee Shop,Burger Joint,Harbor / Marina,Flea Market,7


In [170]:
df_merged2.loc[df_merged2['Cluster Labels'] == 0, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Alamance County,157844,American Restaurant,Mexican Restaurant,Coffee Shop,Pizza Place,Breakfast Spot,Sandwich Place,Italian Restaurant,BBQ Joint,Donut Shop,Japanese Restaurant,0
5,Avery County,17535,BBQ Joint,American Restaurant,Ski Area,Hotel,Pharmacy,Restaurant,Tourist Information Center,Trail,Mexican Restaurant,Italian Restaurant,0
9,Brunswick County,122586,Seafood Restaurant,Beach,American Restaurant,Ice Cream Shop,Mexican Restaurant,Golf Course,Sandwich Place,Italian Restaurant,Supermarket,Burger Joint,0
10,Buncombe County,252268,Brewery,Pizza Place,Bar,Grocery Store,Breakfast Spot,Bakery,BBQ Joint,Coffee Shop,American Restaurant,Park,0
12,Cabarrus County,196716,Fast Food Restaurant,Convenience Store,American Restaurant,Park,Coffee Shop,Mexican Restaurant,Sandwich Place,Pizza Place,BBQ Joint,Burger Joint,0
15,Carteret County,68699,Seafood Restaurant,Beach,Pizza Place,Mexican Restaurant,American Restaurant,Grocery Store,Bar,Fast Food Restaurant,Supermarket,Coffee Shop,0
24,Craven County,103374,Grocery Store,American Restaurant,Pizza Place,Mexican Restaurant,Bar,Fast Food Restaurant,Restaurant,Golf Course,BBQ Joint,Sandwich Place,0
25,Cumberland County,332766,Japanese Restaurant,Burger Joint,Sandwich Place,Bakery,Fast Food Restaurant,American Restaurant,Grocery Store,Coffee Shop,Café,Warehouse Store,0
26,Currituck County,25247,Beach,Farmers Market,American Restaurant,Gas Station,Convenience Store,Gift Shop,Pizza Place,Fast Food Restaurant,Resort,Restaurant,0
31,Durham County,300865,Coffee Shop,Fast Food Restaurant,Mexican Restaurant,Pizza Place,Brewery,Grocery Store,Sandwich Place,Burger Joint,Breakfast Spot,Supermarket,0


In [171]:
df_merged2.loc[df_merged2['Cluster Labels'] == 1, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
2,Alleghany County,10935,Discount Store,Grocery Store,Sandwich Place,Scenic Lookout,Park,Fast Food Restaurant,Pizza Place,Food,Supermarket,Food & Drink Shop,1
51,Jones County,9776,Discount Store,Kids Store,Sandwich Place,Gas Station,Fast Food Restaurant,Golf Course,Business Service,Grocery Store,Scenic Lookout,Food Court,1


In [177]:
df_merged2.loc[df_merged2['Cluster Labels'] == 2, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
4,Ashe County,26833,Fast Food Restaurant,Discount Store,Grocery Store,Gas Station,Pizza Place,American Restaurant,Pharmacy,Mexican Restaurant,Big Box Store,Food,2
6,Beaufort County,47316,Discount Store,Sandwich Place,American Restaurant,Italian Restaurant,Fast Food Restaurant,Supermarket,Hot Dog Joint,Pharmacy,Burger Joint,BBQ Joint,2
11,Burke County,88898,American Restaurant,Fast Food Restaurant,State / Provincial Park,BBQ Joint,Brewery,Pizza Place,Grocery Store,Furniture / Home Store,Gas Station,Park,2
13,Caldwell County,81805,Discount Store,Fast Food Restaurant,Pizza Place,Mexican Restaurant,Sandwich Place,Grocery Store,Pharmacy,Supermarket,Gas Station,Fried Chicken Joint,2
14,Camden County,10336,Fast Food Restaurant,Discount Store,Pizza Place,Convenience Store,Seafood Restaurant,Bakery,American Restaurant,Hotel,Mexican Restaurant,Supermarket,2
17,Catawba County,156182,American Restaurant,Grocery Store,Fast Food Restaurant,Mexican Restaurant,Pizza Place,Ice Cream Shop,Steakhouse,Discount Store,Sandwich Place,Hardware Store,2
18,Chatham County,68364,Pizza Place,Fast Food Restaurant,American Restaurant,Mexican Restaurant,Golf Course,Pharmacy,Grocery Store,Park,BBQ Joint,Seafood Restaurant,2
19,Cherokee County,27463,Discount Store,Fast Food Restaurant,Coffee Shop,Pizza Place,Sandwich Place,Bar,Big Box Store,Other Great Outdoors,American Restaurant,Steakhouse,2
20,Chowan County,14370,Fast Food Restaurant,American Restaurant,Pharmacy,Hotel,Movie Theater,New American Restaurant,Food,Café,Outdoors & Recreation,Coffee Shop,2
21,Clay County,10753,American Restaurant,Fast Food Restaurant,Discount Store,Mexican Restaurant,Pizza Place,BBQ Joint,Pharmacy,Park,Gas Station,Supermarket,2


In [178]:
df_merged2.loc[df_merged2['Cluster Labels'] == 2, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]].shape

(55, 13)

In [173]:
df_merged2.loc[df_merged2['Cluster Labels'] == 3, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
65,Northampton County,20426,Grocery Store,Discount Store,American Restaurant,Zoo Exhibit,Food & Drink Shop,Financial or Legal Service,Fish Market,Fishing Spot,Flea Market,Flower Shop,3


In [174]:
df_merged2.loc[df_merged2['Cluster Labels'] == 4, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
88,Tyrrell County,4090,Tourist Information Center,Rental Service,Grocery Store,Supermarket,Discount Store,Other Great Outdoors,Coffee Shop,Burger Joint,Harbor / Marina,Flea Market,4


In [175]:
df_merged2.loc[df_merged2['Cluster Labels'] == 5, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
36,Gates County,11601,Discount Store,BBQ Joint,Diner,Juice Bar,Stables,Food & Drink Shop,Financial or Legal Service,Fish Market,Fishing Spot,Flea Market,5


In [176]:
df_merged2.loc[df_merged2['Cluster Labels'] == 6, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,Alexander County,37159,Discount Store,Fast Food Restaurant,Gas Station,American Restaurant,Park,Pizza Place,Farmers Market,Electronics Store,Deli / Bodega,Big Box Store,6
3,Anson County,25531,Fast Food Restaurant,Discount Store,Gas Station,Sandwich Place,Ice Cream Shop,Mexican Restaurant,Big Box Store,Breakfast Spot,Fried Chicken Joint,Burger Joint,6
7,Bertie County,19913,Fast Food Restaurant,Discount Store,Pizza Place,Supermarket,Pharmacy,Department Store,Sandwich Place,Mexican Restaurant,Bank,BBQ Joint,6
8,Bladen County,34130,Fast Food Restaurant,Sandwich Place,Discount Store,Pizza Place,Gas Station,Burger Joint,Pharmacy,Grocery Store,Clothing Store,Big Box Store,6
16,Caswell County,22833,Discount Store,Fast Food Restaurant,Supermarket,Gas Station,Tourist Information Center,Park,Campground,Zoo,Italian Restaurant,Sports Club,6
30,Duplin County,59350,Fast Food Restaurant,Discount Store,Sandwich Place,Pizza Place,Pharmacy,Golf Course,Fried Chicken Joint,Supermarket,Gas Station,Grocery Store,6
39,Greene County,21059,Gas Station,Discount Store,Fast Food Restaurant,Supermarket,Italian Restaurant,Pharmacy,Pizza Place,Sandwich Place,Mexican Restaurant,American Restaurant,6
45,Hertford County,24262,Fast Food Restaurant,Pharmacy,Discount Store,Supermarket,Sandwich Place,American Restaurant,Pizza Place,Bank,Café,Coffee Shop,6
58,Martin County,45069,Fast Food Restaurant,Discount Store,Supermarket,Restaurant,Hotel,Steakhouse,Sandwich Place,Pizza Place,Southern / Soul Food Restaurant,Seafood Restaurant,6
61,Montgomery County,27445,Fast Food Restaurant,Discount Store,Gas Station,Sandwich Place,Convenience Store,Pharmacy,Supermarket,State / Provincial Park,Breakfast Spot,Big Box Store,6


In [153]:
df_merged2.loc[df_merged2['Cluster Labels'] == 7, df_merged2.columns[[0,1] + list(range(6, df_merged2.shape[1]))]]

Unnamed: 0,County,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
