In [1]:
# Import libraries needed to perform the analysis

import pandas as pd
import json

import numpy as np

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium


print('Libraries imported.')


Libraries imported.


In [2]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Read neighborhoods data from a json file obtained from Zillow

with open('zillow_neighborhoods_dc.geojson') as json_data:
    dc_geo_data = json.load(json_data)

In [4]:
# Display features available in the json file

neighborhoods_data = dc_geo_data['features']
neighborhoods_data[0]

{'type': 'Feature',
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-76.92404850292002, 38.89834869707605],
    [-76.92302802082145, 38.89834269424013],
    [-76.92212086232087, 38.89834612522238],
    [-76.92189795296916, 38.898348505664984],
    [-76.92189495004052, 38.89797630300189],
    [-76.92188944255929, 38.89758435772628],
    [-76.92188091490071, 38.897192085432785],
    [-76.92187710136669, 38.89676158314599],
    [-76.92187184127575, 38.89605673095374],
    [-76.9218744310906, 38.895508685836624],
    [-76.9218851754648, 38.894688245352],
    [-76.92189, 38.894640000000116],
    [-76.92188, 38.89439],
    [-76.92186, 38.8939],
    [-76.92199841088183, 38.89346499437132],
    [-76.92214858787968, 38.89299300952101],
    [-76.92249951346562, 38.891987135269645],
    [-76.92265770808783, 38.891542973445695],
    [-76.92279807734857, 38.891120639654474],
    [-76.92291, 38.89076],
    [-76.92290355748698, 38.89045720188975],
    [-76.92289522920694, 38.890065772727866],
  

In [5]:
# define columns in a new dataframe

column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe

neighborhoods = pd.DataFrame(columns=column_names)

In [6]:
# Populate the new dataframe one neighborhood at a time

for data in neighborhoods_data:
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['properties']['geo_point_2d']
    neighborhood_lat = neighborhood_latlon[0]
    neighborhood_lon = neighborhood_latlon[1]
    
    neighborhoods = neighborhoods.append({'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [7]:
# View the first five rows

neighborhoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Lincoln Heights,38.89386,-76.924675
1,Kenilworth,38.91293,-76.940925
2,Bellevue,38.820993,-77.013458
3,Kalorama,38.916362,-77.051122
4,Barnaby Woods,38.974894,-77.057115


In [8]:
# Use folium to create a map of Washington using latitude and longitude values

map_dc = folium.Map(location=[38.900497, -77.007507], zoom_start=12)

map_dc.choropleth(
        geo_data = dc_geo_data, 
               fill_color = 'Y10rRd',
               fill_opacity = 0.3,
              # line_opacity = 0.2
               )

display(map_dc)

In [9]:
# Add markers to map to indicate and identify the center of each neighborhood and display the revised map

for lat, lng, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dc)  
    
map_dc

In [10]:
# Enter my Foursquare credentials

CLIENT_ID = 'DH0VYTQ1N4ZPC404HKK3SSEMRO4NFHBOWKNYIODJ40JCTASM' # your Foursquare ID
CLIENT_SECRET = 'ZIYY4JNSPF0AABCQ24HJ1EIKYKNMKPGIJ2SCNGXHHRTWSRGC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: DH0VYTQ1N4ZPC404HKK3SSEMRO4NFHBOWKNYIODJ40JCTASM
CLIENT_SECRET:ZIYY4JNSPF0AABCQ24HJ1EIKYKNMKPGIJ2SCNGXHHRTWSRGC


In [11]:
# Pull out one DC neighborhood to use in developing a search method for venues by category

neighborhoods.loc[0, 'Neighborhood']

'Lincoln Heights'

In [12]:
# Retrieve location coordinates to use in the venue search

neighborhood_lat = neighborhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_long = neighborhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_lat, 
                                                               neighborhood_long))

Latitude and longitude values of Lincoln Heights are 38.8938599263775, -76.92467477213681.


In [13]:
# Set parameters for a search of the trial DC neighborhood for local venues by category

radius = 1000

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_lat, neighborhood_long, VERSION, radius)


In [14]:
# Examine contents and format of Foursquare file

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ed7b611c546f3001b028b54'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Marshall Heights - Lincoln Heights',
  'headerFullLocation': 'Marshall Heights - Lincoln Heights, Washington',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 13,
  'suggestedBounds': {'ne': {'lat': 38.90285993537751,
    'lng': -76.91313284169549},
   'sw': {'lat': 38.88485991737749, 'lng': -76.93621670257814}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '55b7f53e498ee3c6e9440373',
       'name': '7-Eleven',
       'location': {'address': '4854 Nannie Burroughs Ave',
        'lat': 38.899090792437654,
        'lng': -76.93203762173653,
        'labe

In [15]:
# Define a function that extracts the category of each venue

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
# List venues by category and location within the sample neighborhood

venues = results["response"]['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  """


Unnamed: 0,name,categories,lat,lng
0,7-Eleven,Convenience Store,38.899091,-76.932038
1,Subway,Sandwich Place,38.899208,-76.933505
2,Marvin Gaye Recreational Park,Park,38.893473,-76.924769
3,China Cafe,Chinese Restaurant,38.897728,-76.926007
4,East Capital Carryout,Café,38.889439,-76.925284


In [17]:
# Determine how many total venues are located in the sample neighborhood

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

13 venues were returned by Foursquare.


In [18]:
# Define a function to compile venues by category for all DC neighborhoods

def getNearbyVenues(names, latitudes, longitudes, radius=1000, limit=200):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            limit
            )
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
               
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,7-Eleven,Convenience Store,38.899091,-76.932038
1,Subway,Sandwich Place,38.899208,-76.933505
2,Marvin Gaye Recreational Park,Park,38.893473,-76.924769
3,China Cafe,Chinese Restaurant,38.897728,-76.926007
4,East Capital Carryout,Café,38.889439,-76.925284


In [20]:
# Apply the function to generate a list of all DC venues by neighborhood

dc_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )



Lincoln Heights
Kenilworth
Bellevue
Kalorama
Barnaby Woods
Gallaudet
Pleasant Plains
National Mall - West Potomac Park
Fort Davis
Benning
Brightwood
Potomac Heights
Columbia Heights
U Street Corridor
Chevy Chase
Buena Vista
Anacostia Naval Station - Boiling Air Force Base
Capitol Hill
Benning Ridge
Southwest Federal Center
Civic Betterment
Forest Hills
Takoma
Chinatown
Gateway
Congress Heights
Saint Elizabeths
Manor Park
The Palisades
Ledroit Park
Mahaning Heights
Deanwood
Tenleytown
Petworth
Mount Pleasant
Foggy Bottom
Shipley Terrace
Colonial Village
Fairlawn
Friendship Heights
Michigan Park
Burleith
Barney Circle
Penn Quarter
Glover Park
Theodore Roosevelt Island
Shipley Terrace
Sixteenth Street Heights
Dupont Circle
Woodlands
Brightwood Park
Woodley Park
Fort Dupont
Stronghold
Benning Heights
Foxhall
Massachusetts Heights
American University Park
Marshall Heights
Shaw
North Michigan Park
Southwest Waterfront
Langdon
Mayfair
Anacostia
Georgetown
Kent
Mount Vernon Square
Judiciary Sq

In [21]:
# Determine how many total establishments are located in DC, and display the first five

print(dc_venues.shape)
dc_venues.head()

(6619, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lincoln Heights,38.89386,-76.924675,7-Eleven,38.899091,-76.932038,Convenience Store
1,Lincoln Heights,38.89386,-76.924675,Subway,38.899208,-76.933505,Sandwich Place
2,Lincoln Heights,38.89386,-76.924675,Marvin Gaye Recreational Park,38.893473,-76.924769,Park
3,Lincoln Heights,38.89386,-76.924675,China Cafe,38.897728,-76.926007,Chinese Restaurant
4,Lincoln Heights,38.89386,-76.924675,East Capital Carryout,38.889439,-76.925284,Café


In [22]:
# Display the number of establishments in each neighborhood

total_by_neighborhood = dc_venues.groupby('Neighborhood').count()
#total_by_neighborhood
total_by_neighborhood.sort_values(by = ['Venue'], inplace = True, ascending = False)

most_venues = total_by_neighborhood['Venue']
#print(most_venues)
most_venues

Neighborhood
Adams Morgan                   100
Columbia Heights               100
Dupont Circle                  100
Eckington                      100
Foggy Bottom                   100
                              ... 
Barnaby Woods                    7
Deanwood                         7
Saint Elizabeths                 7
Blue Plains Treatment Plant      5
Colonial Village                 3
Name: Venue, Length: 135, dtype: int64

In [23]:
# Determine how many unique categories are present

print('There are {} unique categories.'.format(len(dc_venues['Venue Category'].unique())))

There are 343 unique categories.


In [24]:
# Use one hot encoding to assign dummy variables to neighborhoods based on presence or absence of venue category

dc_onehot = pd.get_dummies(dc_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dc_onehot['Neighborhood'] = dc_venues['Neighborhood'] 
dc_onehot.head()

# move neighborhood column to the first column
#fixed_columns = [dc_onehot.columns[-1]] + list(dc_onehot.columns[:-1])
#dc_onehot = dc_onehot[fixed_columns]

#dc_onehot.head()

Unnamed: 0,Accessories Store,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
# Check to ensure that all establishments and categories are present

dc_onehot.shape

(6619, 343)

In [26]:
# Tabulate frequency of venue presence by neighborhood

dc_grouped = dc_onehot.groupby('Neighborhood').mean().reset_index()
dc_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Adams Morgan,0.0,0.010000,0.00000,0.000000,0.0,0.0,0.020000,0.010000,0.01,...,0.02,0.00,0.00,0.0,0.0,0.0,0.00,0.010000,0.010000,0.020000
1,American University Park,0.0,0.000000,0.00000,0.012195,0.0,0.0,0.000000,0.012195,0.00,...,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.036585,0.000000,0.000000
2,Anacostia,0.0,0.000000,0.00000,0.176471,0.0,0.0,0.058824,0.000000,0.00,...,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000000
3,Anacostia Naval Station - Boiling Air Force Base,0.0,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.00,...,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000000
4,Arboretum,0.0,0.000000,0.02439,0.048780,0.0,0.0,0.000000,0.000000,0.00,...,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,West End,0.0,0.000000,0.00000,0.030000,0.0,0.0,0.000000,0.010000,0.00,...,0.00,0.02,0.00,0.0,0.0,0.0,0.00,0.010000,0.000000,0.000000
131,Woodland-Normanstone Terrace,0.0,0.015625,0.00000,0.031250,0.0,0.0,0.000000,0.000000,0.00,...,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.000000,0.046875,0.078125
132,Woodlands,0.0,0.000000,0.00000,0.000000,0.0,0.0,0.000000,0.000000,0.00,...,0.00,0.00,0.00,0.0,0.1,0.0,0.00,0.000000,0.000000,0.000000
133,Woodley Park,0.0,0.010000,0.00000,0.020000,0.0,0.0,0.000000,0.000000,0.01,...,0.00,0.00,0.01,0.0,0.0,0.0,0.01,0.000000,0.050000,0.120000


In [27]:
# Define a function to array the most common venues in each neighborhood

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
# Set parameters and generate the "top ten" table by interating the function

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = dc_grouped['Neighborhood']

for ind in np.arange(dc_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dc_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adams Morgan,Spa,Coffee Shop,Park,Gym / Fitness Center,Grocery Store,Liquor Store,Cocktail Bar,Ice Cream Shop,New American Restaurant,Diner
1,American University Park,Coffee Shop,Bank,Pizza Place,Convenience Store,Gym / Fitness Center,Furniture / Home Store,Yoga Studio,Dance Studio,Pharmacy,Fast Food Restaurant
2,Anacostia,American Restaurant,History Museum,Spa,Boutique,Bank,Outdoor Sculpture,Theater,Café,Comfort Food Restaurant,Sandwich Place
3,Anacostia Naval Station - Boiling Air Force Base,Donut Shop,Coffee Shop,River,Sandwich Place,Supermarket,Gas Station,Shop & Service,Doctor's Office,Soccer Field,Bowling Alley
4,Arboretum,Garden,Liquor Store,Gas Station,American Restaurant,Brewery,Nightclub,Concert Hall,Fast Food Restaurant,Strip Club,Storage Facility


In [29]:
# Run k-means to identify possible differences in access to neighborhood amenities

#set number of clusters
kclusters = 3

dc_grouped_clustering = dc_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dc_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 1, 2, 2, 0, 0], dtype=int32)

In [30]:
# Add clustering labels
neighborhoods_venues_sorted.insert(0, 'Group', kmeans.labels_)

dc_merged = neighborhoods

# merge to add latitude/longitude for each neighborhood
dc_merged = dc_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

dc_merged.head() 

Unnamed: 0,Neighborhood,Latitude,Longitude,Group,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Lincoln Heights,38.89386,-76.924675,0,Convenience Store,Chinese Restaurant,Food,Liquor Store,Café,Sandwich Place,Building,Park,Construction & Landscaping,Flower Shop
1,Kenilworth,38.91293,-76.940925,0,Border Crossing,Recreation Center,Basketball Court,Coffee Shop,Chinese Restaurant,Park,Liquor Store,Playground,Fish & Chips Shop,Filipino Restaurant
2,Bellevue,38.820993,-77.013458,0,Sandwich Place,Convenience Store,Optical Shop,Gym,Gym / Fitness Center,Discount Store,Video Game Store,Pharmacy,Intersection,Food Truck
3,Kalorama,38.916362,-77.051122,2,Park,Ice Cream Shop,Cocktail Bar,Dessert Shop,Bagel Shop,Coffee Shop,Clothing Store,Ethiopian Restaurant,Massage Studio,Café
4,Barnaby Woods,38.974894,-77.057115,1,Trail,Gym / Fitness Center,Park,Playground,BBQ Joint,Zoo Exhibit,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Event Space


In [31]:
# Create map

map_clusters = folium.Map(location=[38.900497, -77.007507], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dc_merged['Latitude'], dc_merged['Longitude'], dc_merged['Neighborhood'], dc_merged['Group']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [32]:
group_0 = dc_merged.loc[dc_merged['Group'] == 0, dc_merged.columns[[0] + list(range(4, dc_merged.shape[1]))]]
group_0

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Lincoln Heights,Convenience Store,Chinese Restaurant,Food,Liquor Store,Café,Sandwich Place,Building,Park,Construction & Landscaping,Flower Shop
1,Kenilworth,Border Crossing,Recreation Center,Basketball Court,Coffee Shop,Chinese Restaurant,Park,Liquor Store,Playground,Fish & Chips Shop,Filipino Restaurant
2,Bellevue,Sandwich Place,Convenience Store,Optical Shop,Gym,Gym / Fitness Center,Discount Store,Video Game Store,Pharmacy,Intersection,Food Truck
8,Fort Davis,Recreation Center,BBQ Joint,Bike Rental / Bike Share,Convenience Store,Wings Joint,Chinese Restaurant,Boat or Ferry,Pizza Place,Historic Site,Shopping Mall
9,Benning,Convenience Store,Donut Shop,Cosmetics Shop,Park,Gas Station,Metro Station,Sandwich Place,Market,Parking,Shipping Store
11,Potomac Heights,Trail,Convenience Store,Intersection,American Restaurant,Juice Bar,Bakery,Border Crossing,Filipino Restaurant,Ethiopian Restaurant,Event Space
18,Benning Ridge,Liquor Store,Convenience Store,Burger Joint,Chinese Restaurant,Scenic Lookout,Food & Drink Shop,Park,Shop & Service,Gas Station,Fast Food Restaurant
20,Civic Betterment,Chinese Restaurant,Convenience Store,Liquor Store,Fast Food Restaurant,Caribbean Restaurant,Spa,Seafood Restaurant,Other Repair Shop,Gas Station,Burger Joint
25,Congress Heights,Liquor Store,Bar,Tennis Court,Convenience Store,American Restaurant,Deli / Bodega,Art Gallery,Coffee Shop,Baseball Field,Road
30,Mahaning Heights,Park,Cosmetics Shop,Convenience Store,Gas Station,Sandwich Place,Breakfast Spot,Seafood Restaurant,Donut Shop,Metro Station,Snack Place


In [33]:
# Determine the most common category of venue in this group

most_popular = group_0.groupby('1st Most Common Venue').count()
most_popular.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
most_common = most_popular['Neighborhood']
most_common.head()

1st Most Common Venue
Liquor Store          10
Convenience Store      8
Park                   4
Chinese Restaurant     3
Intersection           3
Name: Neighborhood, dtype: int64

In [34]:
# Determine the second most common category of venue in this group

no_2 = group_0.groupby('2nd Most Common Venue').count()
no_2.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
second_most_common = no_2['Neighborhood']
second_most_common.head()

2nd Most Common Venue
Convenience Store       15
Liquor Store             4
Intersection             3
BBQ Joint                2
Fast Food Restaurant     2
Name: Neighborhood, dtype: int64

In [35]:
# Determine the third most common category of venue in this group

no_3 = group_0.groupby('3rd Most Common Venue').count()
no_3.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
third_most_common = no_3['Neighborhood']
third_most_common.head()

3rd Most Common Venue
Convenience Store     6
Intersection          5
Liquor Store          3
Chinese Restaurant    3
Sandwich Place        3
Name: Neighborhood, dtype: int64

In [36]:
group_1 = dc_merged.loc[dc_merged['Group'] == 1, dc_merged.columns[[0] + list(range(4, dc_merged.shape[1]))]]
group_1


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Barnaby Woods,Trail,Gym / Fitness Center,Park,Playground,BBQ Joint,Zoo Exhibit,Dumpling Restaurant,Electronics Store,Ethiopian Restaurant,Event Space
37,Colonial Village,Trail,Gym,Food Service,Food Court,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Event Space,Exhibit
107,Hawthorne,Trail,Construction & Landscaping,Gym / Fitness Center,Business Service,Park,Zoo Exhibit,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Exhibit


In [37]:
# Determine the most common category of venue in this group

most_popular = group_1.groupby('1st Most Common Venue').count()
most_popular.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
most_common = most_popular['Neighborhood']
most_common.head()

1st Most Common Venue
Trail    3
Name: Neighborhood, dtype: int64

In [38]:
# Determine the second most common category of venue in this group

no_2 = group_1.groupby('2nd Most Common Venue').count()
no_2.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
second_most_common = no_2['Neighborhood']
second_most_common.head()

2nd Most Common Venue
Construction & Landscaping    1
Gym                           1
Gym / Fitness Center          1
Name: Neighborhood, dtype: int64

In [39]:
# Determine the third most common category of venue in this group

no_3 = group_1.groupby('3rd Most Common Venue').count()
no_3.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
third_most_common = no_3['Neighborhood']
third_most_common.head()

3rd Most Common Venue
Food Service            1
Gym / Fitness Center    1
Park                    1
Name: Neighborhood, dtype: int64

In [40]:
group_2 = dc_merged.loc[dc_merged['Group'] == 2, dc_merged.columns[[0] + list(range(4, dc_merged.shape[1]))]]
group_2

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Kalorama,Park,Ice Cream Shop,Cocktail Bar,Dessert Shop,Bagel Shop,Coffee Shop,Clothing Store,Ethiopian Restaurant,Massage Studio,Café
5,Gallaudet,Coffee Shop,Liquor Store,Pizza Place,Taco Place,Gourmet Shop,Ice Cream Shop,American Restaurant,Convenience Store,Hotel,Italian Restaurant
6,Pleasant Plains,Coffee Shop,Sandwich Place,Bar,Cocktail Bar,Gay Bar,Pizza Place,Café,Southern / Soul Food Restaurant,Bus Stop,Ethiopian Restaurant
7,National Mall - West Potomac Park,Monument / Landmark,History Museum,Garden,Exhibit,Coffee Shop,Gift Shop,Art Museum,Café,Sculpture Garden,Sandwich Place
10,Brightwood,Park,Fast Food Restaurant,Pizza Place,Golf Course,Discount Store,Thrift / Vintage Store,National Park,Chinese Restaurant,Gas Station,Trail
...,...,...,...,...,...,...,...,...,...,...,...
126,Woodland-Normanstone Terrace,Zoo Exhibit,Zoo,Gym,Hotel Bar,Hotel,Coffee Shop,Sushi Restaurant,Park,Ice Cream Shop,Italian Restaurant
127,Park View,Coffee Shop,Bar,Deli / Bodega,Pizza Place,Bakery,Southern / Soul Food Restaurant,Beer Garden,Theater,Grocery Store,Food Truck
131,Logan Circle,Hotel,Coffee Shop,American Restaurant,Gym / Fitness Center,Grocery Store,New American Restaurant,Ice Cream Shop,Wine Bar,Cocktail Bar,Beer Garden
132,Lady Bird Johnson Park,Sculpture Garden,Monument / Landmark,Park,Tourist Information Center,Trail,Baseball Field,Harbor / Marina,Scenic Lookout,Sandwich Place,Fast Food Restaurant


In [41]:
# Determine the most common category of venue in this group

most_popular = group_2.groupby('1st Most Common Venue').count()
most_popular.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
most_common = most_popular['Neighborhood']
most_common.head()

1st Most Common Venue
Coffee Shop       16
Park               8
Hotel              7
Sandwich Place     6
Liquor Store       6
Name: Neighborhood, dtype: int64

In [42]:
# Determine the second most common category of venue in this group

no_2 = group_2.groupby('2nd Most Common Venue').count()
no_2.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
second_most_common = no_2['Neighborhood']
second_most_common.head()

2nd Most Common Venue
Coffee Shop            11
Pizza Place             6
American Restaurant     5
Sandwich Place          5
Bus Stop                4
Name: Neighborhood, dtype: int64

In [43]:
# Determine the third most common category of venue in this group

no_3 = group_2.groupby('3rd Most Common Venue').count()
no_3.sort_values(by = ['Neighborhood'], inplace = True, ascending = False)
third_most_common = no_3['Neighborhood']
third_most_common.head()

3rd Most Common Venue
Pizza Place            9
Park                   9
Hotel                  6
American Restaurant    5
Bar                    5
Name: Neighborhood, dtype: int64

In [44]:
# Download data to excel for more detailed analysis

dc_merged.to_excel(r'neighborhood_groups.xlsx', index = False)