In [4]:
import config
import numpy as np
import pandas as pd
import json
import requests
from bs4 import BeautifulSoup
import folium
import urllib

### 1 Data Collection

#### 1.1 First we scrape Wikipedia for a list of neighborhoods from each city near Richmond, VA.

In [6]:
page = requests.get('https://en.wikipedia.org/wiki/List_of_neighborhoods_in_Richmond,_Virginia').text
soup = BeautifulSoup(page,'html.parser')

In [7]:
neighborhoods=[]
neigh_search = soup.find_all('li')
for x in neigh_search[7:112]:
    neighborhoods.append(x.text+", Richmond, VA")

#### 1.2 Next we geocode these neighborhoods for latitude and longitude data.

In [8]:
rva_neighborhoods=pd.DataFrame()

In [9]:
for name in neighborhoods:
    url = 'https://maps.googleapis.com/maps/api/geocode/json?address={}&key={}'.format(urllib.parse.quote(name),config.google_api_key)
    results = requests.get(url).json()
    lat=results['results'][0]['geometry']['location']['lat']
    lng= results['results'][0]['geometry']['location']['lng']
    rva_neighborhoods=rva_neighborhoods.append({'Neighborhood':name,'Latitude':lat,'Longitude':lng},ignore_index=True)

In [129]:
rva_neighborhoods.to_csv('rva_neighborhoods.csv',index=False)

In [126]:
lat_center = rva_neighborhoods['Latitude'].mean()
lng_center = rva_neighborhoods['Longitude'].mean()

In [131]:
f = folium.Figure(width=650, height=450)
map_rva = folium.Map(location=[lat_center,lng_center], zoom_start=11).add_to(f)

# add markers to map
for x,y, name in zip(rva_neighborhoods['Latitude'],rva_neighborhoods['Longitude'],rva_neighborhoods['Neighborhood']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [x,y],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_rva)  
    
map_rva

#### 1.3 Now we use the foursquare API to find venues in each neighborhood

In [2]:
CLIENT_ID = '2M2FVW3D5I4QK0ZPX0XWRRECY3XUU5JQK3O5V0GV2LX0MR3Q' # your Foursquare ID
CLIENT_SECRET = '4WE2VPK1JZQN0R4MH3FRMWJA25KNB5VVXM1MVA4BCS2SZ1RI' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2M2FVW3D5I4QK0ZPX0XWRRECY3XUU5JQK3O5V0GV2LX0MR3Q
CLIENT_SECRET:4WE2VPK1JZQN0R4MH3FRMWJA25KNB5VVXM1MVA4BCS2SZ1RI


In [3]:
def getNearbyVenuesLatLng(neighborhood, latitudes, longitudes, radius):
    
    venues_list=[]
    for neigh, lat, lng in zip(neighborhood, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            neigh, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [9]:
df=pd.DataFrame({'Neighborhood':['Church Hill'],'Latitude':[lat],'Longitude':[lng]})
church_hill_venues=getNearbyVenuesLatLng(df['Neighborhood'],df['Latitude'],df['Longitude'],700)
church_hill_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Church Hill,37.53331,-77.415351,Sub Rosa Bakery,37.534915,-77.415869,Bakery
1,Church Hill,37.53331,-77.415351,The Roosevelt,37.535085,-77.415411,Southern / Soul Food Restaurant
2,Church Hill,37.53331,-77.415351,Dutch & Co.,37.531432,-77.416205,New American Restaurant
3,Church Hill,37.53331,-77.415351,Liberty Public House,37.533115,-77.417505,New American Restaurant
4,Church Hill,37.53331,-77.415351,Well-Made Pastry Alliance,37.531318,-77.416167,Bakery


In [6]:
f = folium.Figure(width=650, height=450)
map_church_hill = folium.Map(location=[lat,lng], zoom_start=14).add_to(f)

# add markers to map
for x,y, name in zip(church_hill_venues['Venue Latitude'],church_hill_venues['Venue Longitude'],church_hill_venues['Venue']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [x,y],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_church_hill)  
    
map_church_hill

In [7]:
church_hill_venues.groupby('Venue Category')['Venue'].count().sort_values(ascending=False)

Venue Category
Bakery                                      3
Grocery Store                               2
BBQ Joint                                   2
Scenic Lookout                              2
Pub                                         2
Park                                        2
New American Restaurant                     2
Video Store                                 1
Bridal Shop                                 1
Café                                        1
Coffee Shop                                 1
Deli / Bodega                               1
French Restaurant                           1
German Restaurant                           1
Mexican Restaurant                          1
Historic Site                               1
Liquor Store                                1
Thai Restaurant                             1
Movie Theater                               1
Playground                                  1
Residential Building (Apartment / Condo)    1
Sandwich Place     

### 2 Data Preparation

#### 2.1 Dummy variables are created for each venue category.

#### 2.2 We group each city by neighborhood taking the mean of each category.  This gives a frequency of category occuranace from 0 to 1.

#### 2.3 We cluster to determine the most similar neighborhoods.