In [5]:
import pandas as pd
import numpy as np

import requests
from pandas.io.json import json_normalize
import os

from geopy.geocoders import Nominatim

In [6]:
df_districts = pd.read_csv('kc_house_data.csv')

In [7]:
pd.set_option('display.max_columns', None)
df_districts.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,Region
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650,Seattle
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639,Seattle
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062,Kenmore
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000,Seattle
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503,Sammamish


In [8]:
df_districts.Region.replace({'BlackDiamond':'Black Diamond'}, inplace=True)

In [9]:
df_districts = pd.DataFrame(df_districts.Region.unique())
df_districts.columns = ['Region']

**I am using Geopy to acquire the center of Regions (calculating just the average could have skewed them to one side because of some outlier points in regions):**

In [10]:
lat,lng = [],[]
for d in list(df_districts['Region'].unique()):
    address = d+', Washington'
    geolocator = Nominatim(user_agent='http')
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    lat.append(latitude)
    lng.append(longitude)

In [11]:
df_districts['Latitude'],df_districts['Longitude'] = lat,lng
df_districts

Unnamed: 0,Region,Latitude,Longitude
0,Seattle,47.603832,-122.330062
1,Kenmore,47.75732,-122.244015
2,Sammamish,47.608844,-122.042307
3,Redmond,47.669414,-122.123877
4,Federal Way,47.313494,-122.33931
5,Maple Valley,47.366423,-122.043713
6,Bellevue,47.614422,-122.192337
7,Duvall,47.742322,-121.985678
8,Auburn,47.307537,-122.230181
9,Mercer Island,47.560207,-122.220142


**Foursquare API**

In [12]:
CLIENT_ID = '...' 
CLIENT_SECRET = '...'
VERSION = '20180605' 

First I am going to focus on one district to show the mechanism. Then I will create a loop and find venues for all the 24 districts.

In [13]:
lat_lng = df_districts[df_districts['Region']=='Seattle'].loc[:,['Latitude','Longitude']]
lat = lat_lng.iloc[:,0].reset_index(drop=True)[0]
lng = lat_lng.iloc[:,1].reset_index(drop=True)[0]
print(lat, lng)

47.6038321 -122.3300624


In [14]:
radius = 10000
limit = 200

url_seattle = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, radius, limit)

results = requests.get(url_seattle).json()

Function for extracting category from the json file:

In [41]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [50]:
venues = results['response']['groups'][0]['items']
venues_seattlle = json_normalize(venues)
venues_seattlle.head(2)

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.delivery.id,venue.delivery.provider.icon.name,venue.delivery.provider.icon.prefix,venue.delivery.provider.icon.sizes,venue.delivery.provider.name,venue.delivery.url,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,venue.location.distance,venue.location.formattedAddress,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.neighborhood,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-451c049bf964a520803a1fe3-0,"[{'id': '52e81612bcbc57f1066b7a33', 'name': 'S...",,,,,,,451c049bf964a520803a1fe3,701 5th Ave Fl 75,US,Seattle,United States,,81,"[701 5th Ave Fl 75, Seattle, WA 98104, United ...","[{'label': 'display', 'lat': 47.60450651693834...",47.604507,-122.330484,,98104,WA,Columbia Tower Club,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5762cc9ccd1085a720b1433e-1,"[{'id': '4bf58dd8d48988d143941735', 'name': 'B...",,,,,,,5762cc9ccd1085a720b1433e,621 3rd Ave,US,Seattle,United States,James St & Cherry St,160,"[621 3rd Ave (James St & Cherry St), Seattle, ...","[{'label': 'display', 'lat': 47.603237, 'lng':...",47.603237,-122.33201,,98104,WA,Biscuit B*tch,0,[],


In [52]:
columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
venues_seattlle = venues_seattlle.loc[:,columns]
venues_seattlle.head(2)

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Columbia Tower Club,"[{'id': '52e81612bcbc57f1066b7a33', 'name': 'S...",47.604507,-122.330484
1,Biscuit B*tch,"[{'id': '4bf58dd8d48988d143941735', 'name': 'B...",47.603237,-122.33201


To simplify dataframe I will use the created above function and than change the names of the columns.

In [53]:
venues_seattlle['venue.categories'] = venues_seattlle.apply(get_category_type, axis=1)
venues_seattlle.columns = [col.split('.')[-1] for col in venues_seattlle.columns]
venues_seattlle.head(5)

Unnamed: 0,name,categories,lat,lng
0,Columbia Tower Club,Social Club,47.604507,-122.330484
1,Biscuit B*tch,Breakfast Spot,47.603237,-122.33201
2,Tat's Delicatessen,Sandwich Place,47.601901,-122.332423
3,Tsukushinbo,Japanese Restaurant,47.599963,-122.326878
4,Metropolitan Grill,Steakhouse,47.604617,-122.33428


In [18]:
venues_seattlle.categories.value_counts()

Coffee Shop                      10
Hotel                             7
Bakery                            5
Vietnamese Restaurant             5
Seafood Restaurant                4
Italian Restaurant                3
Sandwich Place                    3
Dumpling Restaurant               3
Sushi Restaurant                  3
Deli / Bodega                     2
Café                              2
Chinese Restaurant                2
American Restaurant               2
Noodle House                      2
Cheese Shop                       2
Performing Arts Venue             2
Japanese Restaurant               2
Lounge                            1
Snack Place                       1
Park                              1
Herbs & Spices Store              1
Fish Market                       1
Gym                               1
Supermarket                       1
French Restaurant                 1
Vegetarian / Vegan Restaurant     1
Theater                           1
Bar                         

**Function for extracting venues for all the regions:**

In [54]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):

        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)

        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']

    return(nearby_venues)

In [73]:
venues_washington = getNearbyVenues(names=df_districts['Region'],
                                   latitudes=df_districts['Latitude'],
                                   longitudes=df_districts['Longitude']
                                  )

In [74]:
venues_washington.sample(10)

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
354,Renton,47.479908,-122.20345,Uptown Glassworks,47.480057,-122.203965,Art Gallery
292,Auburn,47.307537,-122.230181,Sushido,47.307719,-122.228011,Sushi Restaurant
270,Duvall,47.742322,-121.985678,The Duvall Coffeehouse,47.741218,-121.985982,Coffee Shop
473,Woodinville,47.754583,-122.15889,Mi Tierra,47.754946,-122.160623,Mexican Restaurant
48,Seattle,47.603832,-122.330062,Altstadt,47.60048,-122.334399,German Restaurant
340,Issaquah,47.534878,-122.043297,Issaquah Garage Door Repair,47.531768,-122.042522,Other Repair Shop
168,Redmond,47.669414,-122.123877,Prime Steakhouse,47.673347,-122.122127,Steakhouse
455,North Bend,47.495946,-121.785154,Redbox,47.49337,-121.780624,Video Store
565,Bothell,47.75987,-122.206829,Countryside Donut House,47.760398,-122.206314,Donut Shop
159,Redmond,47.669414,-122.123877,Chick'n Fix,47.672675,-122.122648,Food Truck


In [75]:
venues_washington.groupby('District').count()[['Venue']].sort_values(by='Venue', ascending=False)

Unnamed: 0_level_0,Venue
District,Unnamed: 1_level_1
Seattle,100
Bellevue,70
Redmond,70
Renton,48
Woodinville,45
Bothell,41
Vashon,37
North Bend,28
Auburn,27
Duvall,23


In [69]:
venues_washington.rename(columns={'District':'Region'}, inplace=True)

In [70]:
venues_washington.to_csv('venues_washington.csv')