In [4]:
import requests
import os
import pandas as pd

In [6]:
df=pd.read_csv('data.csv')
df.head()

Unnamed: 0,name,latitude,longitude,free_bikes,empty_slots,total_bike_cap
0,East End Library,37.539779,-77.41146,0,6,6
1,Sydney Park,37.546968,-77.456599,0,16,16
2,Broad & Harrison,37.551472,-77.451937,0,19,19
3,Broad & Lombardy,37.553716,-77.455681,0,23,23
4,Broad Rock Library,37.483291,-77.47939,0,10,10


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [11]:
api_key = os.environ['foursquareapi']
latitude = ""
longitude = ''
radius = str(1000)
categories = '11124,13003'
headers = {"Accept": "application/json"}
headers['Authorization'] = api_key
foursquaredata = pd.DataFrame()

for index, row in df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    url = f"https://api.foursquare.com/v3/places/search?ll={latitude},{longitude}&radius={radius}&categories={categories}&fields=name,rating,location,popularity,distance,categories"
    result = requests.get(url, headers=headers)
    data = result.json()
    result_df = pd.json_normalize(data.get('results', []))
    if not result_df.empty:
        result_df['station'] = row['name']
        foursquaredata = pd.concat([foursquaredata, result_df], ignore_index=True)

print(foursquaredata['station'].unique())  # Check unique stations in the final DataFrame


['East End Library' 'Sydney Park' 'Broad & Harrison' 'Broad & Lombardy'
 'Broad Rock Library' 'Abner Clay Park' 'Center Stage'
 'Main Street Station' 'Carter Jones Park' 'Kanawha Plaza' 'Monroe Park'
 'Battery Park' 'City Hall' 'Blackwell' 'Fairmont' 'Science Museum'
 'Browns Island' 'Jefferson Ave' 'Chimborazo Park' 'Canal Walk'
 'Pleasants Park-Oregon Hill' 'North Avenue Library'
 'Petronius Jones Park-Randolph' 'Main Library']


In [235]:
foursquaredata['station'].unique()

array(['East End Library', 'Sydney Park', 'Broad & Harrison',
       'Broad & Lombardy', 'Broad Rock Library', 'Abner Clay Park',
       'Center Stage', 'Main Street Station', 'Carter Jones Park',
       'Kanawha Plaza', 'Monroe Park', 'Battery Park', 'City Hall',
       'Blackwell', 'Fairmont', 'Science Museum', 'Browns Island',
       'Jefferson Ave', 'Chimborazo Park', 'Canal Walk',
       'Pleasants Park-Oregon Hill', 'North Avenue Library',
       'Petronius Jones Park-Randolph', 'Main Library'], dtype=object)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc). 

In [22]:
foursquaredata['rating'].isnull().sum()

56

There are 56 null values in foursquare ratings but I am keeping them in for now because I want to see them with the yelp data in the next stage. The data may be offices and that makes sense that no one has rated them yet.

In [12]:
foursquareparsed = foursquaredata[['station','distance','name','location.address','rating','popularity','categories']]
foursquareparsed.loc[:, 'categories']= foursquareparsed['categories'].apply(lambda x: [cat['name'] for cat in x])

In [13]:
foursquareparsed.loc[:, 'categories'] = foursquareparsed['categories'].apply(lambda x: 'bar' if any('bar' in category.lower() for category in x) or any('pub' in category.lower() for category in x) or any('restaurant' in category.lower() for category in x) else 'office')

Identifying and then dealing with duplicates.

In [14]:
namecount = foursquareparsed.groupby(['name', 'station']).size().reset_index(name='count')
namecount[namecount['count']>1].sort_values(by='count',ascending=False)
foursquareparsed=foursquareparsed.drop_duplicates(subset='name')


Results into CSV for next steps

In [None]:
foursquareparsed.to_csv('foursqdata.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [7]:
yelp_api_key = os.environ['yelpapi']
latitude = ""
longitude = ''
radius = str(1000)
categories = 'bars,offices'
headers = {"Accept": "application/json"}
headers['Authorization'] = f"Bearer {yelp_api_key}"
yelpdataframes = []
for index, row in df.iterrows():
    latitude = row['latitude']
    longitude = row['longitude']
    url = f"https://api.yelp.com/v3/businesses/search?latitude={latitude}&longitude={longitude}&radius={radius}&categories={categories}"
    result = requests.get(url, headers=headers)
    print(f"Status: {result.status_code}")
    data = result.json()
    result_df = pd.json_normalize(data.get('businesses', []))
    if not result_df.empty:
        result_df['station'] = row['name']
        yelpdataframes.append(result_df)
yelpdata = pd.concat(yelpdataframes, ignore_index=True) if yelpdataframes else pd.DataFrame()


Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200
Status: 200


  yelpdata = pd.concat(yelpdataframes, ignore_index=True) if yelpdataframes else pd.DataFrame()


Confirming all stations are present. Parsing data and then renaming column to match Foursquare. 

In [18]:
yelpdata['station'].unique()
yelpdata.keys()

Index(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count',
       'categories', 'rating', 'transactions', 'price', 'phone',
       'display_phone', 'distance', 'business_hours', 'coordinates.latitude',
       'coordinates.longitude', 'location.address1', 'location.address2',
       'location.address3', 'location.city', 'location.zip_code',
       'location.country', 'location.state', 'location.display_address',
       'attributes.business_temp_closed', 'attributes.menu_url',
       'attributes.open24_hours', 'attributes.waitlist_reservation',
       'station'],
      dtype='object')

In [16]:
len(yelpdata)

352

In [20]:
yelpdata['rating'].isnull().sum()

0

In [9]:
yelpparsed = yelpdata[['station','distance','name','location.address1','rating']]
yelpparsed=yelpparsed.rename(columns={'location.address1': 'location.address'})


### Dealing with duplicates

In [10]:
namecount = yelpparsed.groupby(['name', 'station']).size().reset_index(name='count')
namecount[namecount['count']>1].sort_values(by='count',ascending=False)
yelpparsed=yelpparsed.drop_duplicates(subset='name')


Results into a CSV for next steps

In [None]:
yelpparsed.to_csv('yelpdata.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Foursquare provided more complete data because they provide clear documentation around categories and filtering. Yelp's lack of category definitions means that there is more uncertainty around what you are able to pull. 

Get the top 10 restaurants according to their rating

Yelp top 10

In [24]:
yelptop10 = yelpparsed.sort_values(by='rating', ascending=False).reset_index(drop=True).head(10)
yelptop10

Unnamed: 0,station,distance,name,location.address,rating
0,Sydney Park,421.689324,Trouvaille,203 N Lombardy St,5.0
1,Broad & Harrison,919.349422,Penny's Wine Shop,405 Brook Rd,4.8
2,Science Museum,941.927433,Beaucoup,111 N Robinson St,4.7
3,Sydney Park,366.955387,Celladora Wines,111B N Lombardy St,4.7
4,Battery Park,626.506167,Harlym Blue’Z,210 W Brookland Park Blvd,4.6
5,Browns Island,1007.119521,Basic City Beer,212 W 6th St,4.6
6,Abner Clay Park,808.654914,Lillie Pearl,418 E Grace St,4.5
7,Battery Park,570.268715,Fuzzy Cactus,221 Brookland Park Blvd,4.5
8,Sydney Park,496.416882,Heritage,1627 W Main St,4.5
9,Science Museum,963.244101,Parlay,3117 W Leigh St,4.4


Foursquare top 10

In [25]:
foursqtop10 = foursquareparsed.sort_values(by='rating', ascending=False).reset_index(drop=True).head(10)
foursqtop10

Unnamed: 0,station,distance,name,location.address,rating,popularity,categories
0,East End Library,773,Union Market,2306 Jefferson Ave,9.4,0.984966,bar
1,Jefferson Ave,443,Dutch & Co.,400 N 27th St,9.2,0.497622,bar
2,Science Museum,913,Hardywood Park Craft Brewery,2408 Ownby Ln,9.1,0.993189,bar
3,Jefferson Ave,595,23rd & Main,2300 E Main St,8.9,0.929377,bar
4,Science Museum,941,Commercial Taphouse & Grill,111 N Robinson St,8.8,0.931781,bar
5,Main Street Station,192,LuLu's,21 N 17th St,8.7,0.985245,bar
6,East End Library,912,Liberty Public House,418 N 25th St,8.7,0.975994,bar
7,Main Street Station,880,Patrick Henry's Pub & Grille,2300 E Broad St,8.6,0.983712,bar
8,Sydney Park,295,Beauvine Burger Concept,1501 W Main St,8.5,0.988468,bar
9,Main Street Station,552,Station 2,2016 E Main St,8.5,0.994478,bar
