In [1]:
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
import pandas as pd
import folium
import json
from pandas.io.json import json_normalize
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# Get a listing of Charlotte neighborhoods from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_Charlotte_neighborhoods'
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')

In [2]:
# Create a list to hold Charlotte neighborhoods scraped from site
neighborhoods = []

# List to be ignored when scraping
invalid = ['edit', 
           'Article', 
           'Talk', 
           'Read', 
           'Edit',
           'View history', 'Plaza-Midwood (Charlotte neighborhood)',
           'https://en.wikipedia.org/w/index.php?title=List_of_Charlotte_neighborhoods&oldid=1024225125',
           'What links here', 
           'Related changes', 
           'Permanent link' ,
           'Page information', 
           'Cite this page', 
           'Download as PDF', 
           'Printable version', 
           'Mobile view', 
           'Historic South End']

# Neighborhoods to be added to list that don't match the filter
toadd = ['Dilworth', 'Plaza-Midwood', 'South End']

# Find <a href> tags that also have 'Charlotte_neighborhood' in them
for link in soup.select("a[href*=Charlotte_neighborhood]"):
        
        # Add the link to the list as long as it is no in the invalid list above
        if link.text not in invalid:
            neighborhoods.append(link.text)

# Add the neighborhoods to the list that were mis-tagged on the web page
for item in toadd:
    neighborhoods.append(item)
    

# List the Charlotte Neighborhoods
neighborhoods

['Elizabeth',
 'Eastland',
 'Plaza-Midwood',
 'Sherwood Forest',
 'University City',
 'University City',
 'Derita',
 'Highland Creek',
 'NoDa',
 'Mallard',
 'University City',
 'Ballantyne',
 'Chantilly',
 'Elizabeth',
 'Cotswold',
 'Dilworth',
 'Myers Park',
 'Parkdale',
 'Quail Hollow',
 'Sedgefield',
 'SouthPark',
 'Starmount',
 'Steele Creek',
 'Biddleville',
 'Reid Park',
 'Coulwood',
 'Paw Creek',
 'Dilworth',
 'Plaza-Midwood',
 'South End']

In [3]:
# Remove any duplicates from the list
neighborhoods = list(set(neighborhoods))
neighborhoods

['Dilworth',
 'Cotswold',
 'Coulwood',
 'Myers Park',
 'Highland Creek',
 'Paw Creek',
 'Ballantyne',
 'Mallard',
 'Starmount',
 'Parkdale',
 'Sedgefield',
 'Steele Creek',
 'South End',
 'Sherwood Forest',
 'Quail Hollow',
 'Elizabeth',
 'Eastland',
 'Reid Park',
 'NoDa',
 'University City',
 'Biddleville',
 'Chantilly',
 'SouthPark',
 'Plaza-Midwood',
 'Derita']

In [4]:
# How many neighborhoods are there
print(f"There are {len(neighborhoods)} neighborhoods")

There are 25 neighborhoods


In [5]:
# Initialize lists to hold latitude and longitude for each neighborhood
lat_list = []
long_list = []

# Iterate through the list and find the latitude & longitude for each neighborhood

for item in neighborhoods:
    loc = item + ", Charlotte, NC"
    geolocator = Nominatim(user_agent="my_request")
    location = geolocator.geocode(loc)
    print(item, location.latitude, location.longitude)
    lat_list.append(location.latitude)
    long_list.append(location.longitude)

Dilworth 35.2066117 -80.8509137
Cotswold 35.1759242 -80.7983301
Coulwood 35.30420915 -80.93669320359547
Myers Park 35.1917349 -80.8334894
Highland Creek 35.3866225 -80.76050420405772
Paw Creek 35.263798 -80.916153
Ballantyne 35.0546593 -80.8502463
Mallard 35.3409197 -80.7838899
Starmount 35.1382582 -80.8683834
Parkdale 35.1582874 -80.8456613
Sedgefield 35.192921 -80.8634052
Steele Creek 35.14409795 -80.97896658564757
South End 35.2126229 -80.8588405
Sherwood Forest 35.1709339 -80.7837254
Quail Hollow 35.1186739 -80.8399701
Elizabeth 35.2169699 -80.8275822
Eastland 35.2088208 -80.7513256
Reid Park 35.2109472 -80.9033051
NoDa 35.246994 -80.8057768
University City 35.3171141 -80.7529397
Biddleville 35.2450426 -80.8573483
Chantilly 35.2137544 -80.8100707
SouthPark 35.1484341 -80.8309095
Plaza-Midwood 35.2202706 -80.8108494
Derita 35.2833128 -80.8192863


In [6]:
# Create new empty dataframe
df = pd.DataFrame()

# append columns to an empty DataFrame
df['Neighborhood'] = neighborhoods
df['Latitude'] = lat_list
df['Longitude'] = long_list

# Let me see all the rows of the dataframe when I display it
# instead of the default 30
pd.set_option('display.max_rows', 200)

df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Dilworth,35.206612,-80.850914
1,Cotswold,35.175924,-80.79833
2,Coulwood,35.304209,-80.936693
3,Myers Park,35.191735,-80.833489
4,Highland Creek,35.386623,-80.760504
5,Paw Creek,35.263798,-80.916153
6,Ballantyne,35.054659,-80.850246
7,Mallard,35.34092,-80.78389
8,Starmount,35.138258,-80.868383
9,Parkdale,35.158287,-80.845661


In [7]:
# Get the overall location of Charlotte, NC
address = "Charlotte, NC"

geolocator = Nominatim(user_agent="charlotte_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Charlotte are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Charlotte are 35.2272086, -80.8430827.


In [8]:
# create map of Charlotte
import folium
map_charlotte = folium.Map(location=[latitude, longitude], zoom_start=10)
map_charlotte

In [9]:
# Add the neighborhoods to the map
for lat, lng, neighborhood in zip(
        df['Latitude'], 
        df['Longitude'], 
        df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_charlotte)  

map_charlotte

In [10]:
# Foursquare Credentials
CLIENT_ID = '*** REMOVED ***' # your Foursquare ID
CLIENT_SECRET = '*** REMOVED ***' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [11]:
# Test Foursquare access on SouthPark priot to running it against
# all the neighborhoods
southpark_df = df.loc[df['Neighborhood'] == 'SouthPark']
southpark_df

Unnamed: 0,Neighborhood,Latitude,Longitude
22,SouthPark,35.148434,-80.83091


In [12]:
# Create variables for use in Foursquare URL
southpark_latitude = southpark_df['Latitude'].values[0]
southpark_longitude = southpark_df['Longitude'].values[0]
print(southpark_df['Neighborhood'].values[0], southpark_latitude, southpark_longitude)

SouthPark 35.1484341 -80.8309095


In [13]:
# Prepare inputs to retrieve top 100 venues from this first neighborhood from Foursquare
limit = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, southpark_latitude, southpark_longitude, radius, limit)

# Request URL from Foursquare
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60b40410c2be251eaf53a657'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'},
    {'name': '$-$$$$', 'key': 'price'}]},
  'headerLocation': 'Charlotte',
  'headerFullLocation': 'Charlotte',
  'headerLocationGranularity': 'city',
  'totalResults': 86,
  'suggestedBounds': {'ne': {'lat': 35.152934104500005,
    'lng': -80.82541628270204},
   'sw': {'lat': 35.1439340955, 'lng': -80.83640271729797}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '502977e6e4b0f8083ea40f8f',
       'name': 'Whole Foods Market',
       'location': {'address': '6610 Fairview Rd',
        'crossStreet': 'Sharon Rd.',
        'lat': 35.14751261963071,
        'lng': -80.83007642282878,
        '

In [14]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Whole Foods Market,Grocery Store,35.147513,-80.830076
1,Original Pancake House,Breakfast Spot,35.148328,-80.833284
2,HomeGoods,Furniture / Home Store,35.14732,-80.832516
3,Cafe Monte,French Restaurant,35.147154,-80.828726
4,Oak Steakhouse,Steakhouse,35.146634,-80.831118


In [16]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

86 venues were returned by Foursquare.


In [17]:
# create a function to repeat the same process to all the neighborhoods in Charlotte
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
# Run the above function on each neighborhood and create a new dataframe called charlotte_venues
charlotte_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Dilworth
Cotswold
Coulwood
Myers Park
Highland Creek
Paw Creek
Ballantyne
Mallard
Starmount
Parkdale
Sedgefield
Steele Creek
South End
Sherwood Forest
Quail Hollow
Elizabeth
Eastland
Reid Park
NoDa
University City
Biddleville
Chantilly
SouthPark
Plaza-Midwood
Derita


In [19]:
# Check the size of this dataframe
print(charlotte_venues.shape)

charlotte_venues.head()

(394, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Dilworth,35.206612,-80.850914,Latta Park,35.209426,-80.85029,Park
1,Dilworth,35.206612,-80.850914,Zen Massage Charlotte,35.204548,-80.847654,Massage Studio
2,Dilworth,35.206612,-80.850914,PNC Bank,35.204583,-80.848619,Bank
3,Cotswold,35.175924,-80.79833,Krispy Kreme Doughnuts,35.17638,-80.797583,Donut Shop
4,Cotswold,35.175924,-80.79833,The Pizza Peel and Tap Room,35.176726,-80.797906,Pizza Place


In [20]:
# How many venues per neighborhood
charlotte_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ballantyne,58,58,58,58,58,58
Biddleville,3,3,3,3,3,3
Chantilly,6,6,6,6,6,6
Cotswold,44,44,44,44,44,44
Coulwood,5,5,5,5,5,5
Derita,1,1,1,1,1,1
Dilworth,3,3,3,3,3,3
Eastland,3,3,3,3,3,3
Elizabeth,24,24,24,24,24,24
Highland Creek,1,1,1,1,1,1


In [21]:
# How many unique categories from the returned venues?
print('There are {} uniques categories.'.format(len(charlotte_venues['Venue Category'].unique())))

There are 150 uniques categories.


In [22]:
# Analyze each neighborhood

# one hot encoding
charlotte_onehot = pd.get_dummies(charlotte_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
charlotte_onehot['Neighborhood'] = charlotte_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [charlotte_onehot.columns[-1]] + list(charlotte_onehot.columns[:-1])
charlotte_onehot = charlotte_onehot[fixed_columns]

charlotte_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,...,Track,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Dilworth,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Dilworth,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Dilworth,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Cotswold,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Cotswold,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# New dataframe size
charlotte_onehot.shape

(394, 151)

In [24]:
# Group rows by neighborhood and take the mean of the frequency of occurrence of each category
charlotte_grouped = charlotte_onehot.groupby('Neighborhood').mean().reset_index()
charlotte_grouped

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,...,Track,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Ballantyne,0.0,0.034483,0.0,0.0,0.051724,0.017241,0.017241,0.017241,0.034483,...,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0
1,Biddleville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Chantilly,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cotswold,0.0,0.0,0.0,0.022727,0.0,0.0,0.0,0.022727,0.022727,...,0.0,0.0,0.0,0.022727,0.022727,0.0,0.0,0.0,0.0,0.022727
4,Coulwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Derita,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Dilworth,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Eastland,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Elizabeth,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Highland Creek,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
# Confirm new size
charlotte_grouped.shape

(24, 151)

In [26]:
# Print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in charlotte_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = charlotte_grouped[charlotte_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Ballantyne----
              venue  freq
0       Pizza Place  0.07
1  Asian Restaurant  0.05
2       Coffee Shop  0.03
3               Pub  0.03
4               Spa  0.03


----Biddleville----
                     venue  freq
0                     Café  0.33
1      Fried Chicken Joint  0.33
2                    Beach  0.33
3        Accessories Store  0.00
4  New American Restaurant  0.00


----Chantilly----
                venue  freq
0              Arcade  0.33
1  Light Rail Station  0.17
2           Nightclub  0.17
3      Nightlife Spot  0.17
4          Food Stand  0.17


----Cotswold----
                    venue  freq
0             Pizza Place  0.07
1          Cosmetics Shop  0.07
2  Furniture / Home Store  0.05
3    Fast Food Restaurant  0.05
4                    Bank  0.05


----Coulwood----
                             venue  freq
0                   Baseball Field   0.2
1                      Gas Station   0.2
2                        Gastropub   0.2
3                      

In [27]:
# Function to sort venues into descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
# Create the new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = charlotte_grouped['Neighborhood']

for ind in np.arange(charlotte_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(charlotte_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Ballantyne,Pizza Place,Asian Restaurant,Coffee Shop,Pub,Spa,Mobile Phone Shop,Indian Restaurant,Bakery,American Restaurant,Burger Joint
1,Biddleville,Café,Fried Chicken Joint,Beach,Accessories Store,New American Restaurant,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue
2,Chantilly,Arcade,Light Rail Station,Nightclub,Nightlife Spot,Food Stand,Accessories Store,Noodle House,Optical Shop,Park,Performing Arts Venue
3,Cotswold,Pizza Place,Cosmetics Shop,Furniture / Home Store,Fast Food Restaurant,Bank,Pharmacy,Supermarket,Coffee Shop,Convenience Store,Department Store
4,Coulwood,Baseball Field,Gas Station,Gastropub,Pharmacy,Southern / Soul Food Restaurant,Accessories Store,Nightclub,Nightlife Spot,Noodle House,Optical Shop
5,Derita,Garden Center,Accessories Store,Performing Arts Venue,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant
6,Dilworth,Massage Studio,Park,Bank,Accessories Store,Peruvian Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Performing Arts Venue
7,Eastland,Convenience Store,Gym / Fitness Center,Bank,Accessories Store,Pet Store,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue
8,Elizabeth,New American Restaurant,Park,Stadium,Bike Shop,Rock Club,Sandwich Place,Deli / Bodega,Pizza Place,Peruvian Restaurant,College Theater
9,Highland Creek,Basketball Court,Accessories Store,Peruvian Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue,Pet Store


In [29]:
# Cluster Neighborhoods

# set number of clusters
# Having tried kclusters at 7 and 10 which had most of the neighborhoods in one category
# I found that 15 works well for this exercise, giving a large enough cluster to focus on 
# for further processing
kclusters = 15

charlotte_grouped_clustering = charlotte_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(charlotte_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:15] 

array([ 2, 14, 12,  2,  2,  1,  9, 11,  2,  5,  3,  0,  2,  2,  2],
      dtype=int32)

In [30]:
# Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

charlotte_merged = df

# merge charlotte_grouped with charlotte_data to add latitude/longitude for each neighborhood
charlotte_merged = charlotte_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

# Paw Creek has NaN entries so drop it from the dataframe
charlotte_merged.dropna(inplace=True)

# convert the 'Cluster Labels' column to integer otherwise mapping colors don't work
charlotte_merged['Cluster Labels'] = charlotte_merged['Cluster Labels'].astype(int)

charlotte_merged # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Dilworth,35.206612,-80.850914,9,Massage Studio,Park,Bank,Accessories Store,Peruvian Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Performing Arts Venue
1,Cotswold,35.175924,-80.79833,2,Pizza Place,Cosmetics Shop,Furniture / Home Store,Fast Food Restaurant,Bank,Pharmacy,Supermarket,Coffee Shop,Convenience Store,Department Store
2,Coulwood,35.304209,-80.936693,2,Baseball Field,Gas Station,Gastropub,Pharmacy,Southern / Soul Food Restaurant,Accessories Store,Nightclub,Nightlife Spot,Noodle House,Optical Shop
3,Myers Park,35.191735,-80.833489,0,Bagel Shop,Financial or Legal Service,College Auditorium,Track,Accessories Store,Pet Store,Nightlife Spot,Noodle House,Optical Shop,Park
4,Highland Creek,35.386623,-80.760504,5,Basketball Court,Accessories Store,Peruvian Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue,Pet Store
6,Ballantyne,35.054659,-80.850246,2,Pizza Place,Asian Restaurant,Coffee Shop,Pub,Spa,Mobile Phone Shop,Indian Restaurant,Bakery,American Restaurant,Burger Joint
7,Mallard,35.34092,-80.78389,3,Plaza,Accessories Store,Performing Arts Venue,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant
8,Starmount,35.138258,-80.868383,7,Pool,Accessories Store,Performing Arts Venue,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant
9,Parkdale,35.158287,-80.845661,2,Spa,Grocery Store,Italian Restaurant,American Restaurant,Salon / Barbershop,Mexican Restaurant,Bar,Gym,Massage Studio,Flower Shop
10,Sedgefield,35.192921,-80.863405,10,Speakeasy,Food,Accessories Store,Performing Arts Venue,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant


In [31]:
# Visualize the resulting clusters

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(charlotte_merged['Latitude'], charlotte_merged['Longitude'], charlotte_merged['Neighborhood'], charlotte_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [32]:
# Examine Clusters

# Cluster 1
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 0, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Myers Park,0,Bagel Shop,Financial or Legal Service,College Auditorium,Track,Accessories Store,Pet Store,Nightlife Spot,Noodle House,Optical Shop,Park


In [33]:
# Cluster 2
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 1, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,Derita,1,Garden Center,Accessories Store,Performing Arts Venue,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant


In [34]:
# Cluster 3
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 2, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Cotswold,2,Pizza Place,Cosmetics Shop,Furniture / Home Store,Fast Food Restaurant,Bank,Pharmacy,Supermarket,Coffee Shop,Convenience Store,Department Store
2,Coulwood,2,Baseball Field,Gas Station,Gastropub,Pharmacy,Southern / Soul Food Restaurant,Accessories Store,Nightclub,Nightlife Spot,Noodle House,Optical Shop
6,Ballantyne,2,Pizza Place,Asian Restaurant,Coffee Shop,Pub,Spa,Mobile Phone Shop,Indian Restaurant,Bakery,American Restaurant,Burger Joint
9,Parkdale,2,Spa,Grocery Store,Italian Restaurant,American Restaurant,Salon / Barbershop,Mexican Restaurant,Bar,Gym,Massage Studio,Flower Shop
12,South End,2,Coffee Shop,American Restaurant,Bakery,Thai Restaurant,Bar,Restaurant,Pizza Place,Asian Restaurant,French Restaurant,Furniture / Home Store
15,Elizabeth,2,New American Restaurant,Park,Stadium,Bike Shop,Rock Club,Sandwich Place,Deli / Bodega,Pizza Place,Peruvian Restaurant,College Theater
18,NoDa,2,Bar,Brewery,Gastropub,Sports Bar,Food & Drink Shop,Gym / Fitness Center,Concert Hall,Performing Arts Venue,Gift Shop,Dessert Shop
19,University City,2,Brewery,Warehouse Store,Furniture / Home Store,Gym / Fitness Center,Music Store,Pool,Plaza,Pizza Place,Pool Hall,Pharmacy
22,SouthPark,2,Clothing Store,Women's Store,American Restaurant,Jewelry Store,Coffee Shop,Department Store,Boutique,Cosmetics Shop,Accessories Store,Lingerie Store
23,Plaza-Midwood,2,Bar,Pizza Place,Gastropub,Brewery,Pool Hall,Dive Bar,Food Truck,Southern / Soul Food Restaurant,Deli / Bodega,Spa


In [35]:
# Cluster 4
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 3, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Mallard,3,Plaza,Accessories Store,Performing Arts Venue,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant


In [36]:
# Cluster 5
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 4, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Steele Creek,4,Department Store,Peruvian Restaurant,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue,Accessories Store


In [37]:
# Cluster 6
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 5, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Highland Creek,5,Basketball Court,Accessories Store,Peruvian Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue,Pet Store


In [38]:
# Cluster 7
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 6, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Quail Hollow,6,Golf Course,Golf Driving Range,Accessories Store,Music Store,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park


In [39]:
# Cluster 8
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 7, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Starmount,7,Pool,Accessories Store,Performing Arts Venue,New American Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant


In [40]:
# Cluster 9
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 8, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Reid Park,8,Intersection,Discount Store,Fried Chicken Joint,Accessories Store,Peruvian Restaurant,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue


In [41]:
# Cluster 10
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 9, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Dilworth,9,Massage Studio,Park,Bank,Accessories Store,Peruvian Restaurant,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Performing Arts Venue


In [42]:
# Cluster 11
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 10, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Sedgefield,10,Speakeasy,Food,Accessories Store,Performing Arts Venue,Nightclub,Nightlife Spot,Noodle House,Optical Shop,Park,Peruvian Restaurant


In [43]:
# Cluster 12
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 11, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Eastland,11,Convenience Store,Gym / Fitness Center,Bank,Accessories Store,Pet Store,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue


In [44]:
# Cluster 13
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 12, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Chantilly,12,Arcade,Light Rail Station,Nightclub,Nightlife Spot,Food Stand,Accessories Store,Noodle House,Optical Shop,Park,Performing Arts Venue


In [45]:
# Cluster 14
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 13, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Sherwood Forest,13,Business Service,Convenience Store,Accessories Store,Pet Store,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue,Peruvian Restaurant


In [46]:
# Cluster 15
charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 14, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Biddleville,14,Café,Fried Chicken Joint,Beach,Accessories Store,New American Restaurant,Nightlife Spot,Noodle House,Optical Shop,Park,Performing Arts Venue


In [47]:
# This appears to be the best cluster for our purposes
charlotte_cluster_venues = charlotte_merged.loc[charlotte_merged['Cluster Labels'] == 2, charlotte_merged.columns[[0] + list(range(3, charlotte_merged.shape[1]))]]
charlotte_cluster_venues

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Cotswold,2,Pizza Place,Cosmetics Shop,Furniture / Home Store,Fast Food Restaurant,Bank,Pharmacy,Supermarket,Coffee Shop,Convenience Store,Department Store
2,Coulwood,2,Baseball Field,Gas Station,Gastropub,Pharmacy,Southern / Soul Food Restaurant,Accessories Store,Nightclub,Nightlife Spot,Noodle House,Optical Shop
6,Ballantyne,2,Pizza Place,Asian Restaurant,Coffee Shop,Pub,Spa,Mobile Phone Shop,Indian Restaurant,Bakery,American Restaurant,Burger Joint
9,Parkdale,2,Spa,Grocery Store,Italian Restaurant,American Restaurant,Salon / Barbershop,Mexican Restaurant,Bar,Gym,Massage Studio,Flower Shop
12,South End,2,Coffee Shop,American Restaurant,Bakery,Thai Restaurant,Bar,Restaurant,Pizza Place,Asian Restaurant,French Restaurant,Furniture / Home Store
15,Elizabeth,2,New American Restaurant,Park,Stadium,Bike Shop,Rock Club,Sandwich Place,Deli / Bodega,Pizza Place,Peruvian Restaurant,College Theater
18,NoDa,2,Bar,Brewery,Gastropub,Sports Bar,Food & Drink Shop,Gym / Fitness Center,Concert Hall,Performing Arts Venue,Gift Shop,Dessert Shop
19,University City,2,Brewery,Warehouse Store,Furniture / Home Store,Gym / Fitness Center,Music Store,Pool,Plaza,Pizza Place,Pool Hall,Pharmacy
22,SouthPark,2,Clothing Store,Women's Store,American Restaurant,Jewelry Store,Coffee Shop,Department Store,Boutique,Cosmetics Shop,Accessories Store,Lingerie Store
23,Plaza-Midwood,2,Bar,Pizza Place,Gastropub,Brewery,Pool Hall,Dive Bar,Food Truck,Southern / Soul Food Restaurant,Deli / Bodega,Spa


In [48]:
# List of the neighborhoods in the preferred cluster
cluster_list = []
cluster_list = list(charlotte_cluster_venues['Neighborhood'])
print(cluster_list)
print(f"Number of Neighborhoods in the cluster: {len(cluster_list)}")

['Cotswold', 'Coulwood', 'Ballantyne', 'Parkdale', 'South End', 'Elizabeth', 'NoDa', 'University City', 'SouthPark', 'Plaza-Midwood']
Number of Neighborhoods in the cluster: 10


In [49]:
# Create a filter so that only venues containing 'Restaurant' will be utilized
restaurants_mask = charlotte_venues['Venue Category'].str.contains("Restaurant")
restaurants_mask

0      False
1      False
2      False
3      False
4      False
       ...  
389    False
390    False
391     True
392    False
393    False
Name: Venue Category, Length: 394, dtype: bool

In [50]:
# Apply the mask and create a new dataframe
charlotte_restaurants = charlotte_venues[restaurants_mask]
print(charlotte_restaurants.shape)
charlotte_restaurants

(71, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
5,Cotswold,35.175924,-80.79833,Chick-fil-A,35.17717,-80.799032,Fast Food Restaurant
9,Cotswold,35.175924,-80.79833,Koishi Sushi Bar And Fine Chinese,35.176099,-80.797043,Sushi Restaurant
12,Cotswold,35.175924,-80.79833,Shun Lee Palace,35.177464,-80.798581,Chinese Restaurant
37,Cotswold,35.175924,-80.79833,Salsarita's Fresh Mexican Grill,35.175956,-80.802217,Mexican Restaurant
41,Cotswold,35.175924,-80.79833,Bojangles' Famous Chicken 'n Biscuits,35.176792,-80.798618,Fast Food Restaurant
47,Coulwood,35.304209,-80.936693,Heirloom,35.305047,-80.937486,Southern / Soul Food Restaurant
60,Ballantyne,35.054659,-80.850246,The Blue Taj,35.053394,-80.851724,Indian Restaurant
63,Ballantyne,35.054659,-80.850246,Jade Asian Fusion,35.05358,-80.851483,Asian Restaurant
67,Ballantyne,35.054659,-80.850246,Gallery Restaurant,35.056759,-80.848272,American Restaurant
70,Ballantyne,35.054659,-80.850246,Sheng Ramen,35.053668,-80.846876,Asian Restaurant


In [51]:
# We can drop columns we don't need
charlotte_restaurants.drop(['Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude'], axis=1, inplace=True)
charlotte_restaurants

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,Neighborhood,Venue Category
5,Cotswold,Fast Food Restaurant
9,Cotswold,Sushi Restaurant
12,Cotswold,Chinese Restaurant
37,Cotswold,Mexican Restaurant
41,Cotswold,Fast Food Restaurant
47,Coulwood,Southern / Soul Food Restaurant
60,Ballantyne,Indian Restaurant
63,Ballantyne,Asian Restaurant
67,Ballantyne,American Restaurant
70,Ballantyne,Asian Restaurant


In [52]:
charlotte_cluster_restaurants = charlotte_restaurants.loc[charlotte_restaurants['Neighborhood'].isin(cluster_list)]
charlotte_cluster_restaurants

Unnamed: 0,Neighborhood,Venue Category
5,Cotswold,Fast Food Restaurant
9,Cotswold,Sushi Restaurant
12,Cotswold,Chinese Restaurant
37,Cotswold,Mexican Restaurant
41,Cotswold,Fast Food Restaurant
47,Coulwood,Southern / Soul Food Restaurant
60,Ballantyne,Indian Restaurant
63,Ballantyne,Asian Restaurant
67,Ballantyne,American Restaurant
70,Ballantyne,Asian Restaurant


In [53]:
# We discover that 'Fast Food Restaurant' is popular, but we don't
# want to condifer that cuisine, so rmove it from the Dataframe
charlotte_cluster_restaurants = charlotte_cluster_restaurants[charlotte_cluster_restaurants['Venue Category'] != "Fast Food Restaurant"]
charlotte_cluster_restaurants

Unnamed: 0,Neighborhood,Venue Category
9,Cotswold,Sushi Restaurant
12,Cotswold,Chinese Restaurant
37,Cotswold,Mexican Restaurant
47,Coulwood,Southern / Soul Food Restaurant
60,Ballantyne,Indian Restaurant
63,Ballantyne,Asian Restaurant
67,Ballantyne,American Restaurant
70,Ballantyne,Asian Restaurant
78,Ballantyne,Greek Restaurant
81,Ballantyne,Tex-Mex Restaurant


In [54]:
charlotte_cluster_restaurants.size

132

In [55]:
# Get a unique list of Restaurant Types/Cuisines
rest_list = list(set(charlotte_cluster_restaurants["Venue Category"]))
rest_list

['Ramen Restaurant',
 'Greek Restaurant',
 'New American Restaurant',
 'Vegetarian / Vegan Restaurant',
 'Mexican Restaurant',
 'Japanese Restaurant',
 'Peruvian Restaurant',
 'Thai Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Asian Restaurant',
 'Chinese Restaurant',
 'Seafood Restaurant',
 'Eastern European Restaurant',
 'Sushi Restaurant',
 'French Restaurant',
 'Southern / Soul Food Restaurant',
 'Tapas Restaurant',
 'Restaurant',
 'Caribbean Restaurant',
 'Tex-Mex Restaurant',
 'American Restaurant']

In [56]:
# As we are focusing only on restaurants let's change the name of the 'Venue Category' column
# to 'Restaurant'
charlotte_cluster_restaurants.rename(columns={"Venue Category": "Cuisine"}, inplace=True)
charlotte_cluster_restaurants

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


Unnamed: 0,Neighborhood,Cuisine
9,Cotswold,Sushi Restaurant
12,Cotswold,Chinese Restaurant
37,Cotswold,Mexican Restaurant
47,Coulwood,Southern / Soul Food Restaurant
60,Ballantyne,Indian Restaurant
63,Ballantyne,Asian Restaurant
67,Ballantyne,American Restaurant
70,Ballantyne,Asian Restaurant
78,Ballantyne,Greek Restaurant
81,Ballantyne,Tex-Mex Restaurant


In [57]:
charlotte_cluster_restaurants.size

132

In [58]:
# Create a crosstab table to get counts foreach cuisine in each neighborhood
pd.crosstab(charlotte_cluster_restaurants.Cuisine, charlotte_cluster_restaurants.Neighborhood, margins=True)

Neighborhood,Ballantyne,Cotswold,Coulwood,Elizabeth,NoDa,Parkdale,Plaza-Midwood,South End,SouthPark,All
Cuisine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
American Restaurant,2,0,0,0,0,1,1,3,5,12
Asian Restaurant,3,0,0,0,0,0,1,2,0,6
Caribbean Restaurant,0,0,0,0,0,0,1,0,0,1
Chinese Restaurant,1,1,0,1,0,0,1,1,2,7
Eastern European Restaurant,0,0,0,0,1,0,0,0,0,1
French Restaurant,0,0,0,0,0,0,0,1,1,2
Greek Restaurant,1,0,0,1,0,0,0,0,1,3
Indian Restaurant,2,0,0,0,0,0,0,1,0,3
Italian Restaurant,1,0,0,0,0,1,0,1,1,4
Japanese Restaurant,0,0,0,0,0,0,0,0,1,1


In [59]:
# Group the Cusines so we can see what the most popular cusine types are
popular_restaurants = charlotte_cluster_restaurants.groupby('Cuisine').count()
popular_restaurants

Unnamed: 0_level_0,Neighborhood
Cuisine,Unnamed: 1_level_1
American Restaurant,12
Asian Restaurant,6
Caribbean Restaurant,1
Chinese Restaurant,7
Eastern European Restaurant,1
French Restaurant,2
Greek Restaurant,3
Indian Restaurant,3
Italian Restaurant,4
Japanese Restaurant,1


In [60]:
# sort the Cusines into descending order of popularity
popular_restaurants.sort_values(by=['Neighborhood'], ascending=False, inplace=True)
popular_restaurants

Unnamed: 0_level_0,Neighborhood
Cuisine,Unnamed: 1_level_1
American Restaurant,12
Chinese Restaurant,7
Asian Restaurant,6
Mexican Restaurant,6
Italian Restaurant,4
New American Restaurant,4
Greek Restaurant,3
Indian Restaurant,3
Restaurant,3
Southern / Soul Food Restaurant,3


In [61]:
# What are the top 5 cuisines that we should consider
top5 = popular_restaurants.head(5).reset_index(drop=False)
top5

Unnamed: 0,Cuisine,Neighborhood
0,American Restaurant,12
1,Chinese Restaurant,7
2,Asian Restaurant,6
3,Mexican Restaurant,6
4,Italian Restaurant,4


In [62]:
# initialize a list to hold the top5 cuisines
top5_list = []

for index, row in top5.iterrows():
    top5_list.append(row['Cuisine'])

top5_list

['American Restaurant',
 'Chinese Restaurant',
 'Asian Restaurant',
 'Mexican Restaurant',
 'Italian Restaurant']

In [63]:
# Create new Dataframe with neighborhoods and top5 cuisines
table = charlotte_cluster_restaurants.loc[charlotte_cluster_restaurants['Cuisine'].isin(top5_list)]
table

Unnamed: 0,Neighborhood,Cuisine
12,Cotswold,Chinese Restaurant
37,Cotswold,Mexican Restaurant
63,Ballantyne,Asian Restaurant
67,Ballantyne,American Restaurant
70,Ballantyne,Asian Restaurant
83,Ballantyne,Italian Restaurant
85,Ballantyne,Chinese Restaurant
89,Ballantyne,Asian Restaurant
98,Ballantyne,American Restaurant
103,Ballantyne,Mexican Restaurant


In [64]:
# Create a crosstab table to get counts foreach cuisine in each neighborhood
pd.crosstab(table.Cuisine, table.Neighborhood, margins=True)

Neighborhood,Ballantyne,Cotswold,Elizabeth,Parkdale,Plaza-Midwood,South End,SouthPark,All
Cuisine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
American Restaurant,2,0,0,1,1,3,5,12
Asian Restaurant,3,0,0,0,1,2,0,6
Chinese Restaurant,1,1,1,0,1,1,2,7
Italian Restaurant,1,0,0,1,0,1,1,4
Mexican Restaurant,1,1,1,1,0,1,1,6
All,8,2,2,3,3,8,9,35


In [65]:
cuisine_count = table.groupby('Cuisine').count()
cuisine_count.sort_values(by=['Neighborhood'], ascending=False, inplace=True)
cuisine_count

Unnamed: 0_level_0,Neighborhood
Cuisine,Unnamed: 1_level_1
American Restaurant,12
Chinese Restaurant,7
Asian Restaurant,6
Mexican Restaurant,6
Italian Restaurant,4


In [66]:
neighborhood_count = table.groupby('Neighborhood').count()
neighborhood_count.sort_values(by=['Cuisine'], ascending=False, inplace=True)
neighborhood_count

Unnamed: 0_level_0,Cuisine
Neighborhood,Unnamed: 1_level_1
SouthPark,9
Ballantyne,8
South End,8
Parkdale,3
Plaza-Midwood,3
Cotswold,2
Elizabeth,2
