In [9]:
!pip install requests
!pip install geopy
!pip install geopandas
!pip install ratelimiter
!pip install contextily









Collecting pyproj>=3.0.1
  Using cached pyproj-3.6.0-cp310-cp310-win_amd64.whl (5.7 MB)
Installing collected packages: pyproj
Successfully installed pyproj-3.6.0












In [4]:
import geopandas as gpd
from shapely.geometry import Polygon
import pandas as pd
import numpy as np
import json
import requests
from ratelimiter import RateLimiter
import plotly.express as px
import matplotlib.pyplot as plt
import contextily as cx
import time
from datetime import datetime
from IPython.display import clear_output
from IPython.display import JSON

# Foursquare API Venue Scrape
## Objective
This notebook will use the **Foursquare API** to find all venues in a geographic area and gather extended information (popularity, type, contact details, location etc.) for each venue:
- The Geographic area will be square shaped and defined using latitude and longitude coordinates. 
- The **Place Search** Foursquare API will be used to find the venues. 
- Type of venue was defined to look for only restraunts, nature-related placed, art museums and monuments. 
- The **Get Place Details** Foursquare API will be used to get extended data on each venue. 

The end result will be a csv file containing all venues found and their associated data. The total cost of the API calls for a big city is ~\\$80 and thus the free tier Sandbox membership (\\$200 of free API calls per month) comfortably covers the costs.  

## Approach
- **Venue Search** - Exhaustively search geographic area for venues. 
- **Get Venue Details** - Get extended venue information on each venue we found. 

## Venue Search Functions


In [35]:
with open('key.txt') as f:
    key = f.read()

### Define Place Search API Call

The foursquare API has a rate limit of 50 calls per second. We used the **ratelimiter** package to apply this rate limit to the function. 

The following API Paramaters were used to define the API call: 
- **ll** - comma seperated latitude and longitude value e.g. '5123,0.0324'
- **radius** - the radius (in meters) around the latitude / longitude value that will be included in the search.
- **categories** - commas separated list of category codes. The search will only return businesses within those categories. The categories can be found [here](https://developer.foursquare.com/docs/categories). We primarily searched for bars alongside some other categories. You can leave this blank to return all businesses within the search area. 
- **sort** - defines how to sort results. We sorted by distance i.e. closest to the latitude / longitude value first. 
- **limit** - maximum number of results returned. 50 is the max number Foursquare allows. 

In [6]:
# Base url string of the Place Search API call
search_url = "https://api.foursquare.com/v3/places/search"

# RateLimiter decorator, ensures the api_call function is called less 
# than 50 times per second, the Foursquare rate limit
@RateLimiter(max_calls=49, period=1)
def api_call(long_lat, radius, cats, key):
    """Perform a foursquare places api search and return response.
    
    Args:
        long_lat (string): The latitude and longitude coordinate of 
            the search in comma separated format e.g. '51.513882,-0.119478'. 
            The API will find venues within a radius of this point.   
        radius (string): The search radius (in meters) for the venue 
            search e.g. '800'. 
        cats (string): A comma separated list of Foursquare Category Codes
            e.g. '11131,11039'. The search will only return venues in these 
            categories. Leave as empty list e.g. '' if you want all categories 
            included in search. 
        key (string): Foursquare API key. 

    Returns:
        api response: Response from foursquare places api call. The call was
            defined by the latitude and longitude, radius and categories supplied
            to the function. A maximum of 50 venues will be contained in the response.
    """
    
    # Defines paramaters to add to the search_url. These define the Search.  
    params = dict(ll = long_lat,
                  radius = radius, 
                  categories = cats,
                  sort = 'DISTANCE',
                  limit = '50'
                 )
    # Defines headers required in Place Search API call
    headers = {
        "Accept": "application/json",
        "Authorization": key
    }

    return requests.request("GET", search_url, headers = headers, params = params)

Test Place Search API call to check it works as expected below. It should return a JSON file containing a list of results. 

In [4]:
# Perform Place Search API call
response = api_call('51.513882,-0.119478', '1000', '11131,11039', key)
# extract results from response as json file
results = json.loads(response.text)
# Visualise results
JSON(results)

<IPython.core.display.JSON object>

### Extract Data From Foursquare Response
The Place Search API returns a response that contains a list of up to 50 venues. The objective is to extract the venue data (name, location etc.) of each venue. To do this we do the following:
- Extract text from API response and convert to JSON file 
- Filter the JSON on 'results' - this gives us a list of the up to 50 venues, where each element in the list is a dictionary containing the data for one venue. 
- Pass the list of venue data to the *extract_venues* function, this extracts the data from each element of the list and returns the data as a list of lists (in a format that can be readily converted to a pandas dataframe). 

The extract_venues function is defined below: 

In [8]:
def extract_venues(venues_list):
    """Extracts desired data from venue_list and returns as list of lists
    
    Args:
        venue_list (list): List of dictionaries, each dictionary contains the data
        of a venue. 
    
    Returns: 
        list: List of lists containing the desired venue data for all venues. 
    """
    venues = []
    # Loop through venue_list, each element is a nested dictionary containing 
    # data for a venue
    for venue in venues_list:
        # Foursquare ID, unique reference for the venue
        fsq_id = venue['fsq_id']
        latitude = venue['geocodes']['main']['latitude']
        longitude = venue['geocodes']['main']['longitude']
        venue_name = venue['name']
        # address and postcode data not always present, so use error handling
        try: 
            venue_address = venue['location']['address']
        except KeyError:
            venue_address = ''
        try:
            venue_postcode = venue['location']['postcode']
        except KeyError:
            venue_postcode = ''
            
        venues.append([fsq_id, venue_name, latitude, longitude, 
                       venue_address, venue_postcode])
    return venues

### Visualise and Project Search Area
The plot_search_area function uses plotly to visualise the search area (in geographic coordinates i.e. latitude and longitude coordinates). The plot is interactive, allowing you to zoom in and out. 

Another function was created to project the search area from latitude longitude coordinates to a flat coordinate system. The flat projection can be altered, simply change the value provided to the 'projection' argument when this function is called in the **search_area** function (defined later). 

Both functions are defined below:  

In [9]:
def plot_search_area(min_lat, max_lat, min_long, max_long):
    """Plot search area (in geographic coordinates)
    
    Args: 
        max_lat (float): maximum latitude of search area. 
        min_lat(float): minimum latitude of search area. 
        max_long (float): maximum longitude of search area. 
        min_long (float): minimum longitude of search area.
    
    Returns:
        (NA): Does not return anything. Creates and displays search
            area using Plotly. 
    """
    # Trace out search area perimeter, starting and ending on top left 
    # corner (max lat and min long)
    search_boundaries = [[max_lat, min_long],[max_lat, max_long], 
                         [min_lat, max_long], [min_lat,min_long], 
                         [max_lat, min_long]]
    # Convert to Dataframe
    df_boundaries = pd.DataFrame(search_boundaries, 
                                 columns = ['latitude', 'longitude'])

    # Plot as square area on a map
    fig = px.line_mapbox(df_boundaries, lat="latitude", lon="longitude", zoom = 9)
    fig.update_layout(mapbox_style="open-street-map", mapbox_zoom = 9, 
                      margin = {"r":0,"t":0,"l":0,"b":0})
    fig.update_layout(autosize = False, width = 800, height = 300,)
    fig.show()

def project_search_area(max_lat, min_lat, max_long, min_long, projection):
    """Create search area and project to flat coordinate system. 
    
    Args: 
        max_lat (float): maximum latitude of search area. 
        min_lat(float): minimum latitude of search area. 
        max_long (float): maximum longitude of search area. 
        min_long (float): minimum longitude of search area. 
        projection (string): Name of coordinate reference system the search 
            area will be projected to e.g. 'ESRI:102013'. Will accept anything 
            that would be accepted by the geopandas .to_crs() function.
    
    Returns: 
        (Geopandas DataFrame): A dataframe with a single row that defines the 
            search area in projected coordinates. 
    """
    # We need to represent the search area coordinates in a geopandas dataframe. 
    # First we need to build a polygon to represent the area: 
    lats_of_search_area = [max_lat, max_lat, min_lat, min_lat, max_lat]
    longs_of_search_area = [min_long, max_long, max_long,min_long, min_long]
    # Convert max and min lat and long values to a polygon
    search_area_polygon = Polygon(zip(longs_of_search_area, 
                                      lats_of_search_area))
    # Now we turn the polygon to a geopandas dataframe
    search_area_gdf = gpd.GeoDataFrame(index=[0], crs='epsg:4326',
                                       geometry=[search_area_polygon])  
    # project to flat coordinate system and return
    return search_area_gdf.to_crs(projection)

### Search For Venues
The **search_area** function performs the Place Search API call across the entire search area until every part of it has been covered by an API call i.e. every part has been searched for venues. With each API call, it will update the remaining search area (by removing a circular area from it that is representative of the last API call). It will periodically print the remaining search area to screen. When the entire area is searched, it returns a list of lists detailing all venues found.

```
search_area = project_search_area(max_lat, min_lat, max_long, min_long, 'ESRI:102013')
```
The following functions are used by the **search_area** function: 
- **get_search_location** - Finds a random location within the remaining search area and returns this location in latitude and longitude coordinates (required for the API call) and in the flat coordinates (required to remove the circular area from the search area). 
- **update_search_area** - Removes the provided circular search area from the remaining search area. 

In [10]:
def get_search_location(search_area):
    """Return search location and its latitude longitude coordinate.
    
    Args: 
        search_area (Geopandas DataFrame): A single row dataframe showing the 
        remaining search area. 
    Returns: 
        (tuple): The next search location, defined in projected coordinates as
            a Geopandas Series, and its Comma separated latitude longitude 
            coordinate i.e. (search_location, lat_long). 
    """
    # Get next search location using representative_point() function (finds 
    # single point within the search area). 
    search_location = search_area.representative_point()
    # Convert location to lat long coordinate system
    search_location_lat_long = search_location.to_crs('epsg:4326')
    # Extract latitude and longitude
    latitude = search_location_lat_long.y.values[0]
    longitude = search_location_lat_long.x.values[0]
    # convert to string format 'latitude,longitude'. Note that we forced the 
    # longitude to show up to 20 decimal places to avoid the output being 
    # displayed in scientific notation which breaks the API call. 
    lat_long = f"{latitude},{longitude :.20f}"
    
    return (search_location, lat_long)

def update_search_area(search_area, area_just_searched):
    """Remove the area_just_searched from search_area"""
    return search_area.overlay(area_just_searched, how="difference")

def search_area(search_area_coords, initial_search_radius, cats, 
                time_between_updates, search_no):
    """Exhaustively Search area for venues using the Foursquare API. 
    
    Args: 
        search_area_coords (list): The max and min latitude and longitude 
            coordinates that define the search area. Must be in 
            form: [min_lat, max_lat, min_long, max_long] 
        initial_search_radius (int): The radius used in the first Foursquare 
            API call at each search location. If the search returns 50 results,
            the API call is repeated using a reduced radius.  
        cats (string): Comma seperated list of Foursquare venue category 
            codes e.g. '11131,11039'
        time_between_updates (int): Seconds between progress updates being output.
        search_no (int): This number is printed as part of the progress update. 
            It is used to keep track of how many times the entire search area has 
            been searched when using the Multi-Search.
        
    Returns:
        (tuple): 2 items are returned via a tuple. The first is a List of lists
            containing all venues found in the search. For each venue, it records 
            the unique forusquare id, venue name, latitude, longitude and address.
            The second item is the total number of API calls made during the venue 
            search. It is an integer.  
    """
    # Print search parameters
    print(f"{initial_search_radius = }")
    print(f"{cats = }")
    print(f"{time_between_updates = }")
    print(f"{multi_search = }\n")

    # Create projected search area from max min lat longitude coords
    search_area = project_search_area(max_lat, min_lat, 
                               max_long, min_long, 'ESRI:102013')

    data = []
    # Initialise counters, flags and timer
    time_last_update = time.time()
    total_calls = 0
    number_needed_retries = 0
    locations_searched_in_last_update = 0
    radius_too_big = False 

    while(len(search_area)>0):
        # Replace current cell output with next new output. 
        # Allows the search area map to keep being updated without 
        # keeping old maps visible.  
        clear_output(wait=True)
        # Get next search location and its latitude and longitude
        search_location, lat_long = get_search_location(search_area)
        #print(lat_long)
        # Set search_radius_not_found flag.  
        search_radius_not_found = True
        search_radius = initial_search_radius

        # Loop until a search radius is found that returns less than 50 results.
        while search_radius_not_found:
            # Count each time API called
            total_calls += 1
            # Make API call to search for venues from search location
            response = api_call(lat_long, search_radius, cats, key)
            # If response status code isnt 200, there was an API error. Print 
            # the API error, wait 5 seconds and try API call again. Loop until 
            # status 200 code returned by API. 
            while response.status_code != 200:
                print(f"API Error code {response.status_code}, sleeping 5 seconds")
                print(f"API Call Parameters: {lat_long = }, {search_radius = }")
                time.sleep(5)
                response = api_call(lat_long, search_radius, cats, key)
            
            results = json.loads(response.text)
            # Extract list of dictionaries defining search results
            venues_list = results['results']
            # If 50 venues found, reduce search radius then start while loop again
            if len(venues_list) > 49:
                search_radius = int(search_radius / 2)
                radius_too_big = True 
            else: 
                # End while loop
                search_radius_not_found = False

        # iterate number of locations searched
        locations_searched_in_last_update += 1
        # iterate IF the search radius has been reduced
        number_needed_retries += radius_too_big
        # reset flag
        radius_too_big = False
        
        # Extract venue data from venue_list
        data.extend(extract_venues(venues_list))
        
        # We will now define the circular area we just searched and remove it
        # from the search area. We will reduce the radius of our circle by 20 
        # to account for any distance innaccuracies by Foursquare or our 
        # projected distances i.e. we remove a smaller circle from our search 
        # area than we actually searched for with the API.  
        if search_radius > 20: 
            adjusted_radius = search_radius - 20
        else:
            # If the search_radius was smaller than 20, reduce it by 10% (to 
            # avoid negative radius)
            adjusted_radius = int(0.9 * search_radius)

        # Create area just searched - a circle with adjusted_radius. 
        area_just_searched = gpd.GeoDataFrame(
            search_location.buffer(adjusted_radius)
        )
        # Sets the buffer polygon above explicitely as the geometry
        area_just_searched = area_just_searched.set_geometry(0)
        # Update search area by removing the area_just_searched from it
        search_area = update_search_area(search_area, area_just_searched)
        
        # Periodic update 
        if time.time() - time_last_update > time_between_updates:
            # Plot the remaining search area
            ax = search_area.plot(alpha=0.5, color = 'red')
            title = f"Remaining Search Area"
            plt.title(title)
            # Add background map to search area plot - note the error handling 
            # because it occasionally has httperror
            try:
                cx.add_basemap(ax, crs = search_area.crs)
            except:
                pass
            # remove axis from plot
            plt.axis('off')
            plt.show()
            # Print periodic update
            print(f"\033[1mSearch {search_no}:\033[0m Number of locations "
                  + f"searched in last {time_between_updates} seconds --> " 
                  + f"{locations_searched_in_last_update}")
            # Calculate fraction of locations searched that required a smaller radius
            fraction_of_retries = number_needed_retries / locations_searched_in_last_update
            # If fraction more than 60% and at least 6 locations searched (to 
            # ensure a reasonable sized sample)
            if fraction_of_retries > 0.6 and locations_searched_in_last_update > 6:
                print(f"{number_needed_retries} out of {locations_searched_in_last_update} " 
                      + f"searches had too large radius --> {fraction_of_retries:.1%}")
                # Reduce Initial Search Radius by 20%
                initial_search_radius = int(initial_search_radius * 0.8)
                print(f"Reducing Initial Search Radius to {initial_search_radius}")

            # Reset counters and flags for next update
            time_last_update = time.time()
            locations_searched_in_last_update = 0 
            number_needed_retries = 0 

    return (data, total_calls)

### Control Function (including Multi-Search)

**perform_venue_search** is the function called to define and perform the venue search, all in a single line. It returns the venues found by the search in a pandas dataframe. The Multi-Search functionality (repeating the search of the whole geographic area multiple times until no new venues are found on a subsequent search) is implemented by this function. 

When using multi-search, once the search area has been entirely searched at least one time, you can exit the search and return the venues found so far via a keyboard interrupt (i.e. by stopping the code from running). This allows you to abandon the multi-search if it is taking too long without losing the venues found so far.

The perform_venue_search function uses 2 other functions: 
- **find_new_venues** - Used for multi-search. This function compares all venues found so far with the venues found in the latest search of the area and returns the number of new venues found. 
- **build_dataframe** - Converts the list of lists returned from the search_area function into a pandas dataframe. Removes duplicates and removes venues from outside the search area. 

In [11]:
def perform_venue_search(search_area_coords, initial_search_radius, 
                         cats, time_between_updates, multi_search): 
    """Perform venue search across geographic area and return venues found.
    
    If the multi-search argument is set to True, the venue search across the 
    geographic area will be repeated multiple times until subsequent searches 
    stop finding new venues (that were not found on any previous search of the 
    area).  
    
    Args: 
        search_area_coords (list): Defines the geographic area to be searched 
            via maximum and minimum latitude and longitude. Should be in the 
            format [min_lat, max_lat, min_long, max_long]. 
        initial_search_radius (int): The radius used in the first Foursquare 
            API call at each search location. If the search returns 50 results,
            the API call is repeated using a reduced radius. As the search 
            progresses, the intial search radius will be reduced dynamically 
            (when more API calls require a smaller radius). When using 
            multi-search, the initial search radius will always be reset to 
            the value provided by this argument when the search of the entire
            area is restarted. 
        cats (string): Comma seperated list of Foursquare venue category 
            codes e.g. '11131,11039'. This defines what category of venue will 
            be found by the search (e.g. if you set this to the code for bars, 
            you would only find venues that are categorised as bars).  
        time_between_updates(int): Seconds between progress updates being 
            output. 
        multi_search (bool): If False, will search the entire 
            geographic area a single time and return all venues found. If 
            True, will search the entire geographic area multiple times 
            until subsequent searches do not find any new venues (venues not 
            found on any previous search). It will search the entire area 
            a minimum of 3 times. After the first search has completed, you 
            can stop the code from running to abort the multi-search and 
            return all venues found so far. 
            
        Returns: 
            (pandas dataframe): All venues found by the venue search, 
                including basic information on each venue e.g. location, 
                name, unique id (fsq_id). 
    """
    # Enclose whole function in error handling. This will handle the 'keyboard
    # interrupt' error such that if you stop the code running, it will return 
    # all venues found so far.  
    try:
        # If multi-pass is False, perform one search of entire area and return 
        # venues found 
        if (multi_search == False):
            # Search area and return all venues found (as list of lists) and
            # total api calls 
            data, total_calls = search_area(search_area_coords, 
                                            initial_search_radius, 
                                            cats, time_between_updates, 1)
            # Convert venues found to pandas dataframe and remove duplicates 
            # and remove venues outside search area
            venues = build_dataframe(data)
            # Print search result summary
            print(f"\nTotal Venues Found -> {venues.shape[0]}")
            print(f"Total Calls -> {total_calls}\n")
            # Print search parameters
            print(f"\n{initial_search_radius = }")
            print(f"{cats = }")
            print(f"{time_between_updates = }")
            print(f"{multi_search = }\n")
            return venues
        # If multi-pass is True, perform multiple searches of area 
        # (until no new venues found)
        
        # track number of new venues found with each new search. Each 
        # element indicates the number of new venues found. It starts 
        # recording after the second search completes (i.e. the 0th element
        # will be the new venues found since the first search. The second 
        # element is the new venues found since the 3rd search etc.)
        new_venues_found = []
        print(f"Starting Search 1...\n")
        # Search area and return all venues found (as list of lists) and
        # total api calls 
        data, total_calls = search_area(search_area_coords, 
                                        initial_search_radius, 
                                        cats, time_between_updates, 1)
        # Convert venues found to pandas dataframe and remove duplicates 
        # and remove venues outside search area
        cum_venues = build_dataframe(data)
        # set the fsq_id (unique id) as the index to make comparing the contents 
        # of the dataframe easier using the pandas .join method
        cum_venues.set_index('fsq_id', inplace = True)
        # Flag variable that idicates if the area should be searched again 
        # (will restart searching until its False)
        need_more_passes = True
        # Tracks how many searches of the area have been performed (the second 
        # search is about to be started)
        search_no = 2
        # Keep searching the geographic area until need_more_passes is False
        while(need_more_passes):
            # Print summary of search results so far
            print(f"\nTotal Venues Found -> {cum_venues.shape[0]}")
            print(f"Total Calls -> {total_calls}")
            # new_venues_found is empty until 2 searches are completed.  
            if search_no > 2: 
                # Print out new venues found in each new search of the area
                print('\nMulti-Search Results:')
                for index, item in enumerate(new_venues_found):
                    print(f"Search {index + 2}: Found {item} new venues")
                    
            print(f"\nStarting Search {search_no}...\n")
            # Search area and return all venues found (as list of lists) and
            # total api calls 
            data, total_calls_latest = search_area(search_area_coords, 
                                                   initial_search_radius, 
                                                   cats, time_between_updates, 
                                                   search_no)
            # Update total api calls
            total_calls += total_calls_latest
            # Convert latest venues found to dataframe 
            latest_venues = build_dataframe(data)
            # set the fsq_id (unique id) as the index to make comparing the 
            #contents of the dataframe easier using the pandas .join method
            latest_venues.set_index('fsq_id', inplace = True)
            # Calculate and record number of new venues found in the latest 
            # search
            new_venues_found.append(find_new_venues(cum_venues, 
                                                    latest_venues)
                                   )
            # If min of 3 searches completed AND the latest search found no 
            # new venues
            if (search_no > 2) and new_venues_found[-1] == 0:
                # End multi-search (stop restarting search of whole area by 
                # ending while loop)
                need_more_passes = False

            search_no += 1
            # Add new venues found to all venues found in previous searches 
            # (cum_venues)
            cum_venues = pd.concat([cum_venues, latest_venues])
            # Remove duplicates on cum_venues (removes duplicates on the index 
            # which is a unique id for each venue)
            cum_venues = cum_venues[~cum_venues.index.duplicated(keep='first')]
        # Once multi-search completed (while loop has ended) print out summary 
        # of results
        print(f"\nTotal Venues Found -> {cum_venues.shape[0]}")
        print(f"Total Calls -> {total_calls}\n")
        # Print search parameters
        print(f"{initial_search_radius = }")
        print(f"{cats = }")
        print(f"{time_between_updates = }")
        print(f"{multi_search = }\n")          
        # Print new venues found in each subsequent search
        print('Multi-Search Results:')
        for index, item in enumerate(new_venues_found):
            print(f"Search {index + 2}: Found {item} new venues")
    
        return cum_venues.reset_index()
    # If keyboard interrupt triggered during search (if code stopped)
    except KeyboardInterrupt:
        clear_output(wait=True)
        print(f"Multi-Search Manually Ended. Completed {search_no - 1} Searches")
        print(f"\nTotal Venues Found -> {cum_venues.shape[0]}")
        print(f"Total Calls -> {total_calls}\n")
        # Print search parameters
        print(f"{initial_search_radius = }")
        print(f"{cats = }")
        print(f"{time_between_updates = }")
        print(f"{multi_search = }\n")          
        # Print new venues found in each subsequent search
        print('Multi-Search Results:')
        for index, item in enumerate(new_venues_found):
            print(f"Search {index + 2}: Found {item} new venues")
        
        return cum_venues.reset_index()

def find_new_venues(cum_venues, latest_venues):
    """Return number of venues in latest_venues that are not in cum_venues.
    
    The index of both dataframes should be the fsq_id which is a unique id 
    for each venue. The function calculates how many of the indexes in 
    latest_venues are not found in cum_venues i.e. the number of new venues 
    found.
    
    Args: 
        cum_venues (pd dataframe): Contains all venues found in all previous
            searches of the geographic area. The index should be the fsq_id 
            and there should be no duplicate entries.
        latest_venues (pd datafram): Contains all venues found in the latest
            search of the geographic area. The index should be the fsq_id 
            and there should be no duplicate entries.
            
    Returns:
        (int): Number of number of indexes in latest_venues are not found 
            in cum_venues i.e. the number of new venues. 
    """
    
    # merge latest_venues with cum_venues using left join on index (fsq_id)
    joined = latest_venues.join(cum_venues, how = 'left', 
                                lsuffix = '_latest', rsuffix = '_cum')
    # A nan entry in venue_name_cum column of 'joined' indicates the index was 
    # not found in cum_venues i.e. this is a new venue. Returns the sum of nan 
    # rows in venue_name_cum. 
    return joined.venue_name_cum.isna().sum()

def build_dataframe(data):
    """Convert venue data list of lists to dataframe. 
    
    Removes duplicate venues and removes venues from outside search area.
    
    Args: 
        data (list of lists): Venue data returned by the search_area function.
            It is ready to be converted into a pandas dataframe and contains 
            basic data on each venue found by the search e.g. venue name, 
            location etc.
            
    Returns: 
        (pd dataframe): Venues found by the search in pandas dataframe format.
            Duplicate venues have been removed and any venues located outside 
            the search area are also removed. 
    """
    venues = pd.DataFrame(data, columns = ['fsq_id', 'venue_name', 
                                           'latitude', 'longitude', 
                                           'address', 'postcode'])
    # remove duplicates in fsq_id, the unique id for each venue
    venues = venues.drop_duplicates(subset=['fsq_id'])
    # filter out venues whose location is outside the search area. 
    venues = venues[(min_lat <= venues.latitude) & (venues.latitude <= max_lat) 
                    & (venues.longitude > min_long) & (venues.longitude < max_long)]
    return venues

## Venue Search
The **geographic area** of the venue search is defined in this section, followed by performing the venue search. 
### Define Geographic Area
The following variables define the **geographic area**: 
- min_lat
- max_lat
- min_long
- max_long

These variables are defined in the cell below. You should edit their values in this cell to define the Geographic Area you wish to search. The area you have defined will be visualised when you run the cell. Note you can repeatedly edit these variables and re-run the cell to fine tune or change your search area.    

In [17]:
# Define search area via min and max latitude and longitude

min_lat = 47.491911
max_lat =  47.734145
min_long = -122.435977
max_long = -122.224433


search_area_coords = [min_lat, max_lat, min_long, max_long]

plot_search_area(min_lat, max_lat, min_long, max_long)

### Setting Search Arguments

The venue search is defined and performed using the **perform_venue_search** function. This function takes multiple arguments which define how the search is performed. These arguments must all be provided and some require a certain amount of tuning to optimise the search (where optimisation means speeding up the search and minimising the number of API calls).  

The table below defines these arguments and provides advice on setting / tuning them: 

|Argument Name|Description|Setting / Tuning Advice|
|:---:|---|---|
|**search_area_coords**|List defining the search area via maximum and minimum latitude and longitude values.|This is set in the cell above, no special tuning is required, set based on where you want to search for venues.|
|**initial_search_radius**|The API is given a search location and a radius and returns up to 50 venues found within the provided radius of the search location. The Initial Search Radius is the first radius provided to the API whenever a new location is searched for venues. If 50 venues are returned, the radius is reduced and the API is called again until less than 50 venues are returned.<br><br>The Initial Search Radius will slowly reduce from the initial value it is set at. An update is printed to screen periodically. Whenever more than 60% of search locations (since the last update) require a smaller radius, the Initial Search Radius is Reduced and this is recorded in the next update. <br><br>If using multi-search, the Initial Search Radius is reset to the original value set by this argument each time the search of the area is re-run. |You want this to be as large as possible without reducing the Initial Search Radius on the first update. Around 2000 is a good starting point.|
|**time_between_updates**|Time period (in seconds) between printing updates to screen. This value is also the time period used to determine if the Initial Search Radius will be reduced.|You want fairly frequent updates so that the Initial Search Radius can be reduced optimally. We suggest you start with 20 seconds. Make sure you search at least 8 different search locations per update, otherwise you'll be deciding whether to reduce the Initial Search Radius on a small sample.|
|**cats**|Category filter - defines what category of venue you want the search to find e.g. bars only. Must be a comma separated string e.g. '11131,11039'. <br><br>If you do not want to filter venues by category, set as blank string ''|Find a list of category codes [here](https://location.foursquare.com/places/docs/categories).|
|**multi-search**|A single search of the entire geographic area typically returns ~90-95% of venues within the area. To get the final ~10% of venues, you need to re-run the search multiple times. The multi-search function will make the search re-run multiple times until the latest search does not find any new venues i.e. once ~100% of venues have been found.<br><br>Note - when using multi-search, you don't need to wait until the multi-search completely finishes. You can stop the code running anytime after at least 1 search has been completed and all venues found so far will be returned (you will not lose them).|Set as **False** if you do not want to use multi-search. <br>Set as **True** if you want to use multi-Search. Note that multi-search will significantly increase the run time and number of API calls. We suggest you switch off multi-search for your first run. Make sure the search completes correctly and tune the Initial Search Radius. Then you can switch on multi-search.|

### Example Tuned Searches
Below we provide 2 examples of search arguments we tuned for our computer. Both examples use the original geographic area defined in the above cell (London, England). We changed the categories being searched for, and this in turn changed the optimal Initial Search Radius: 

**Event Spaces In London** - *returns ~ 300 results, very fast to run*
- initial_search_radius = 1500
- cats = '11131,11039'
- time_between_updates = 20
- multi-search = False

**Bars and Restaurants in London** - *returns ~ 6000 results, 15 minutes to run*
- initial_search_radius = 1000
- cats = '10039,10040,10041,10045,10049,11039,13003'
- time_between_updates = 20
- multi-search = False

### Perform Venue Search
This is where we perform the venue search. Edit the search arguments (explained in the table above) then run the cell to perform the venue search: 

In [18]:
initial_search_radius = 650
# The below commented out categories were used in testing. 
# They return event spaces only, and there are only around 
# 330 of them in the geographic area, so it makes a useful
# fast test search
#cats: 10027 (Museum), 13000 (Dining and Drinking), 16000 (Landmarks and Outdoors)

cats = '10027,13000,16000'
time_between_updates = 20
multi_search = True

venues = perform_venue_search(search_area_coords, initial_search_radius, 
                            cats, time_between_updates, multi_search)



Total Venues Found -> 5461
Total Calls -> 3273

initial_search_radius = 650
cats = '10027,13000,16000'
time_between_updates = 20
multi_search = True

Multi-Search Results:
Search 2: Found 17 new venues
Search 3: Found 0 new venues


### Preview Search Results 

In [20]:
venues.head()

Unnamed: 0,fsq_id,venue_name,latitude,longitude,address,postcode
0,596bfbe6920540752e4f4534,Romio's Pizza Pub,47.612972,-122.329457,1011 Pike St,98101
1,4e841231d3e393b3e134aa42,The Pillars,47.613727,-122.328873,,98101
2,5df29f0e059a360009132dfc,Zaika,47.613821,-122.3288,1100 Pike St,98101
3,56995b4b498e30d566717b04,Daawat Indian Grill,47.612652,-122.331429,820 Pike St,98101
4,84f2b446bdc54b7b0d24a581,Chutneys Queen Anne,47.613821,-122.3288,1100 Pike St,98101


### Save Search Results

In [21]:
todays_date = datetime.today().strftime('%d-%b-%y')
venues.to_csv('foursquare_venues ' + todays_date + '.csv', index = False)

Uncomment the below code if you want to load the saved venues

In [6]:
#venues = pd.read_csv('foursquare_venues 15-Feb-23.csv')

### Price Calculation
The below calculates the cost for a certain number of Foursquare API calls. You can edit the **total_requests** variable and run the cell to estimate the total cost for that many API calls. 

In [22]:
total_requests = 11986
cost_per_request = 0.50
total_price_in_dollars = total_requests * cost_per_request / 100
print(f"Total price for {total_requests} requests is ${total_price_in_dollars}")

Total price for 11986 requests is $59.93


## Get Venue Details
The Foursquare API is now used to pull extended venue data on each venue we found in the Venue Search. 

In [23]:
# Base url of the Get Place Details API (requires the fsq_id of 
# the venue after 'places/' 
venue_details_url = "https://api.foursquare.com/v3/places/"

# RateLimiter decorator, ensures the api_call function is called less 
# than 50 times per second, the Foursquare rate limit
@RateLimiter(max_calls=49, period=1)
def get_venue_details(fsq_id, key):
    """Get venue details using foursquare Get Place Details API
    
    Note that the fsq_id is appended to the API url, it is not a 
    paramater of the html request.  
    
    Args:
        fsq_id (string): Foursquare ID for venue you are requesting details for
        key (string): Authorisation key (links request to account paying for request) 
    
    Returns: 
        Html response from Foursquare that contains extended data on the venue
    """
    # Fields paramater defines what data foursquare will return
    fields = ['location', 'categories', 'chains', 'related_places',
              'tel', 'fax', 'email', 'website', 'verified', 
              'hours_popular', 'rating', 'stats', 'popularity', 
              'price', 'date_closed', 'tastes']
    # Convert fields to comma separated list
    fields = ','.join(fields)
    params = dict(fields = fields)
    headers = {
        "Accept": "application/json",
        "Authorization": key
    }
    # fsq_id is appended to end of API request url
    return requests.request("GET", venue_details_url + fsq_id, 
                            headers = headers, params = params)

Below we perform a test to check the API request works. It should display a json containing data on the requested venue. 

In [24]:
# Perform Get Place Details API call
response = get_venue_details(venues.fsq_id[13], key)
# extract venue_details from response as json file
venue_details = json.loads(response.text)
# Visualise venue_details
JSON(venue_details)

<IPython.core.display.JSON object>

### Extract Data From API Response
Below we define functions to extract the data we want from the API response. Note that the API response is first converted to a json file. This file is then provided to the **extract_venue_details** function which extracts the data. 

The table below defines the data that is extracted from the json.

Data|Description
:---:|:---
email|contact email address of venue
tel|contact telephone number of venue
website|website of venue
verified|A boolean that indicates whether or not the FSQ Place has been claimed.
rating|A numerical rating (from 0.0 to 10.0) of the FSQ Place, based on user votes, likes/dislikes, tips sentiment, and visit data. Not all FSQ Places will have a rating.
popularity|Measure of the FSQ Place's popularity, by foot traffic. This score is on a 0 to 1 scale and uses a 6-month span of POI visits for a given geographic area.
price|A numerical value (from 1 to 4) that best describes the pricing tier of the FSQ Place, based on known prices for menu items and other offerings. Values include: 1 = Cheap, 2 = Moderate, 3 = Expensive, and 4 = Very Expensive
date_closed|The recorded date when the FSQ Place was marked as permanently closed in Foursquare's databases. This does not necessarily indicate the POI was actually closed on this date.
categories|An array, possibly empty, of categories that describe the FSQ Place. Stored as a list of strings. 
total_ratings|Total number of ratings for venue 
parent|If the venue is owned by a parent venue, the parent venue's name will be listed 
parent_fsq_id|If the venue is owned by a parent venue, the parent venue's foursquare ID will be listed 
neighborhood|Description of the neighborhood the venue is located in e.g. 'Clapham'. When initially extracted it is stored as a one item list.

In [25]:
def extract_venue_details(venue_details):
    """Extract data from venue_details json as a list
    
    Args:
        venue_details (json): The json file containing the venue details data.
        
    Returns:
        list: The data for the venue, e.g. 'email', 'price' etc. in a single 
            list. All data that couldnt be found is nan."""
    fields = venue_details.keys() 
    # Extract simple data from venue_details json
#     email = extract_simple_data('email', venue_details, fields)
#     tel = extract_simple_data('tel', venue_details, fields)
#     website = extract_simple_data('website', venue_details, fields)
    verified = extract_simple_data('verified', venue_details, fields)
    rating = extract_simple_data('rating', venue_details, fields)
    popularity = extract_simple_data('popularity', venue_details, fields)
    price = extract_simple_data('price', venue_details, fields)
    date_closed = extract_simple_data('date_closed', venue_details, fields)

    # total_ratings is stored under 'stats' within the venue_details json. Below 
    # we extract the 'stats' dict from venue_details json and then extract 
    # total_ratings from that.  
    stats = extract_simple_data('stats', venue_details, fields)
    # Check stats wasn't nan
    if stats == stats:
        total_ratings = stats['total_ratings']
    else: 
        total_ratings = np.nan

    # Extract categories as a list of possible categories. If categories not
    # in json, return nan. 
    if 'categories' in fields:
        # Categories stored as a list in the venue_details json
        categories_list = venue_details['categories']
        categories = []
        # Loop through categories_list and extract the string name of each
        # category in the list. 
        for category in categories_list:
            categories.append(category['name'])        
    else:
        categories = np.nan

    # Extract parent from 'related_places' field. 'related_places' always
    # exists but contains an empty dictionary most of the time. We only 
    # want to extract the parent data if the parent field exists within 
    # the 'related_places' dictionary. 
    related_places = venue_details['related_places']
    # Check if 'related_places' dic contains something
    if len(related_places) > 0:
        # Check if 'parent' contained within 'related_places'
        if 'parent' in related_places.keys():
            parent = related_places['parent']['name']
            parent_fsq_id = related_places['parent']['fsq_id']
        else: 
            parent = np.nan
            parent_fsq_id = np.nan
    else: 
        parent = np.nan
        parent_fsq_id = np.nan

    # Extract 'neighborhood' from the 'location' field. 'location' stores 
    # data as a dict.
    location_fields = venue_details['location'].keys()
    # Can repurpose the 'extract_simple_data' function to extract
    # 'neighborhood' data from the 'location' dictionary. 
    locality = extract_simple_data('locality', 
                                       venue_details['location'], 
                                       location_fields)

    return [verified, rating, popularity, price, 
           date_closed, categories, total_ratings, parent, 
           parent_fsq_id, locality]

def extract_simple_data(field, venue_details, fields):
    """Extracts data from venue_details json using the associated field
    
    Args:
        field (str): The field you wish to extract the value of e.g. 'email'.
        venue_details (json): The json file containing the data.
        fields (list): All fields contained by the json file. 

    Returns:
        bool/str/float/dict/np.nan: Returns the data stored in venue_details json 
            under the provided field. If the provided field does not exist in 
            the venue_details json, returns np.nan. 
    """
    if field in fields:
        return venue_details[field]
    else:
        return np.nan

### Get Venue Details
Below we request and extract extended data for every venue we found. It takes roughly 35 minutes to run the code and cost ~ $60 for around 6000 venues.

In [26]:
venue_details_data = []
total_venues = venues.shape[0]
# Loop through venue foursquare ids (fsq_id), request venue details from
# API and extract required data
for index, fsq_id in enumerate(venues.fsq_id):
    # Perform Get Place Details API call
    response = get_venue_details(fsq_id, key)
    # Keep retrying API call every 5 seconds if the API response code
    # indicates an error (i.e. not 200) 
    while response.status_code != 200:
            print(f"API Error code {response.status_code}, sleeping 5 seconds")
            time.sleep(5)
            # retry API call
            response = get_venue_details(fsq_id, key)
    # extract venue_details from response as json file
    venue_details = json.loads(response.text)
    # Extract data from venue_details json and append to list
    extracted_data = extract_venue_details(venue_details)
    # Add fsq_id to first entry of extracted_data list
    extracted_data = [fsq_id] + extracted_data
    venue_details_data.append(extracted_data)
    #print(index)
    # Provide status update every time 500 venues details have been extracted
    if (index + 1) % 500 == 0:
        print(f"{index + 1} venue details extracted out of {total_venues}")

500 venue details extracted out of 5461
1000 venue details extracted out of 5461
1500 venue details extracted out of 5461
2000 venue details extracted out of 5461
2500 venue details extracted out of 5461
3000 venue details extracted out of 5461
3500 venue details extracted out of 5461
4000 venue details extracted out of 5461
4500 venue details extracted out of 5461
5000 venue details extracted out of 5461


## Clean And Save Data
### Build Dataframe
We need to clean and restructure parts of the venue details data. Below, we create a dataframe with the venue details data.

In [27]:
venue_details_df = pd.DataFrame(venue_details_data, 
                             columns = ['fsq_id', 'verified', 
                                        'rating', 'popularity', 'price', 
                                        'date_closed', 'categories', 
                                        'total_ratings', 'parent', 
                                        'parent_fsq_id', 'locality'])

### Save Data
Save data in case of any issues / to avoid having to re-request all the venue details. 

In [28]:
todays_date = datetime.today().strftime('%d-%b-%y')
venue_details_df.to_csv('venue_details_' + todays_date + '.csv',
                        index = False)

Optional code - used to load the saved venue details data if you need to. 

In [3]:
# Load in saved raw venue details data if required 
from ast import literal_eval
venue_details_df = pd.read_csv('venue_details_14-Jul-23.csv', converters={"categories": literal_eval})

NameError: name 'pd' is not defined

In [54]:
venue_details_df.shape

(5461, 11)

### Expand 'Categories' Column

Foursquare categorises venues using category such as 'Bar', 'Restaurant' etc. Every venue can be assigned up to 3 categories. There are hundreds of possible categories. The 'categories' column contains a list with the categories each venue is assigned to e.g. \[Arts and Entertainment, Pub, English Restaurant]. 

Because any venue can have any combination of up to 3 categories, there is no simple way (e.g. a single column) of representing this data. The simplest way is to One Hot Encode the data i.e. create a new column for each possible category and assign it a 1 or 0 depending on whether the venue has that category.  

Below we one hot encode the 'categories' column (as a new dataframe) and summarise how common each category is. 

In [55]:
# Dummy variable the lists in the 'categories' column such that a new 
# column is made for each category named in the lists and it will 
# have a 0 or a 1 depending on whether that category was included 
# in the list for that row.   
categories_df = venue_details_df['categories'].str.join('|').str.get_dummies()
# Expand pandas viewing options so full series can be printed 
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
# Sum up each category and sort descending to see which 
# categories are most common 
categories_sum = categories_df.sum().sort_values(ascending = False)
categories_sum

American Restaurant                 541
Park                                522
Restaurant                          444
Coffee Shop                         440
Bar                                 270
Pizzeria                            249
Fast Food Restaurant                238
Café                                234
Sandwich Spot                       230
Cocktail Bar                        221
Playground                          213
Bakery                              181
Burger Joint                        156
Mexican Restaurant                  150
Asian Restaurant                    144
Dining and Drinking                 142
Landmarks and Outdoors              141
Italian Restaurant                  137
Deli                                137
Harbor or Marina                    134
Seafood Restaurant                  121
Thai Restaurant                     117
Lounge                              117
Brewery                             106
Monument                            105


Now we will take a look at the popular categories that have a higher than average popularity rating

In [136]:
mean = venue_details_df.popularity.mean()

popular_cat = venue_details_df[venue_details_df['popularity'] > mean]['categories']
popular_cat.head(50)

unique_strings = []

# Iterate over each element in the object
for item in popular_cat:
    # Check if the element is a list
    if isinstance(item, list):
        # Iterate over each string in the list
        for string in item:
            # Check if the string is not already in the unique_strings list
            if string not in unique_strings:
                # Append the string to the unique_strings list
                unique_strings.append(string)

unique_strings

['Scenic Lookout',
 'Bar',
 'Indian Restaurant',
 'Buffet',
 'Cocktail Bar',
 'Lounge',
 'Caribbean Restaurant',
 'Dog Park',
 'Coffee Shop',
 'Dim Sum Restaurant',
 'Shanghai Restaurant',
 'Dumpling Restaurant',
 'Pub',
 'American Restaurant',
 'Wine Bar',
 'New American Restaurant',
 'Restaurant',
 'Seafood Restaurant',
 'Mediterranean Restaurant',
 'Middle Eastern Restaurant',
 'Bakery',
 'Café',
 'Japanese Restaurant',
 'Chinese Restaurant',
 'Burger Joint',
 'Fast Food Restaurant',
 'Steakhouse',
 'Dive Bar',
 'Gay Bar',
 'Sushi Restaurant',
 'Italian Restaurant',
 'Hotel Bar',
 'Latin American Restaurant',
 'Mexican Restaurant',
 'Sandwich Spot',
 'Cafe, Coffee, and Tea House',
 'Bagel Shop',
 'Pizzeria',
 'Beer Bar',
 'Beer Garden',
 'Arcade',
 'Dessert Shop',
 'Asian Restaurant',
 'Korean Restaurant',
 'Dining and Drinking',
 'Sports Bar',
 'Comfort Food Restaurant',
 'Landmarks and Outdoors',
 'Pie Shop',
 'Thai Restaurant',
 'Playground',
 'Plaza',
 'Park',
 'Modern European 

In [57]:
exploded_df = venue_details_df.loc[venue_details_df.index.repeat(venue_details_df['categories'].str.len())]

# Reset the index of the exploded dataframe
exploded_df.reset_index(drop=True, inplace=True)

# Explode the 'categories' column by assigning the list of categories to a new column
exploded_df['exploded_categories'] = [item for sublist in venue_details_df['categories'] for item in sublist]

# Apply one-hot encoding using get_dummies on the exploded dataframe
one_hot_encoded_df = pd.get_dummies(exploded_df['exploded_categories']).astype(int)

# Concatenate the original dataframe with the one-hot encoded dataframe
venue_details_df = pd.concat([venue_details_df, one_hot_encoded_df], axis=1)

venue_details_df = venue_details_df.dropna(subset=['fsq_id'])

venue_details_df.reset_index(drop=True, inplace=True)
venue_details_df

Unnamed: 0,fsq_id,verified,rating,popularity,price,date_closed,categories,total_ratings,parent,parent_fsq_id,locality,African Restaurant,Agriculture and Forestry Service,American Restaurant,Amusement Park,Arcade,Argentinian Restaurant,Art Museum,Arts and Entertainment,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Bathing Area,Bavarian Restaurant,Bay,Beach,Beer Bar,Beer Garden,Belgian Restaurant,Bicycle Store,Bike Trail,Bistro,Botanical Garden,Bowling Alley,Brasserie,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Buffet,Burger Joint,Burrito Restaurant,"Cafe, Coffee, and Tea House",Cafeteria,Café,Cajun and Creole Restaurant,Cambodian Restaurant,Campground,Cantonese Restaurant,Caribbean Restaurant,Casino,Cha Chaan Teng,Chinese Restaurant,Cidery,Cocktail Bar,Coffee Shop,College and University,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Creperie,Cuban Restaurant,Cupcake Shop,Deli,Dessert Shop,Dim Sum Restaurant,Diner,Dining and Drinking,Distillery,Dive Bar,Dive Spot,Dog Park,Donut Shop,Drive-in Theater,Dumpling Restaurant,Dutch Restaurant,Eastern European Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fish and Chips Shop,Food Court,Food Stand,Food Truck,Food and Beverage Retail,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Garden,Gastropub,Gay Bar,Gelato Shop,German Restaurant,Greek Restaurant,Grocery Store,Halal Restaurant,Harbor or Marina,Hawaiian Restaurant,Hiking Trail,Historic and Protected Site,History Museum,Hong Kong Restaurant,Hookah Bar,Hot Dog Joint,Hot Spring,Hotel Bar,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Parlor,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Iraqi Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Lake,Landmarks and Outdoors,Latin American Restaurant,Lebanese Restaurant,Lighthouse,Lounge,Malay Restaurant,Meadery,Meat and Seafood Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Mongolian Restaurant,Monument,Moroccan Restaurant,Mountain,Movie Theater,Museum,Music Venue,National Park,Nature Preserve,New American Restaurant,Night Club,Night Market,Noodle Restaurant,North Indian Restaurant,Other Great Outdoors,Pakistani Restaurant,Park,Pastry Shop,Pedestrian Plaza,Peking Duck Restaurant,Peruvian Restaurant,Pet Café,Piano Bar,Picnic Area,Pie Shop,Pizzeria,Playground,Plaza,Poke Restaurant,Polish Restaurant,Pool Hall,Portuguese Restaurant,Poutine Restaurant,Pub,Puerto Rican Restaurant,RV Park,Ramen Restaurant,Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Rooftop Bar,Russian Restaurant,Sake Bar,Salad Restaurant,Salvadoran Restaurant,Sandwich Spot,Scandinavian Restaurant,Scenic Lookout,Science Museum,Scottish Restaurant,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shawarma Restaurant,Smoothie Shop,Soup Spot,South American Restaurant,Southern Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sports Bar,Sri Lankan Restaurant,State or Provincial Park,Steakhouse,Street Food Gathering,Sushi Restaurant,Szechuan Restaurant,Taco Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Tiki Bar,Travel and Transportation,Turkish Restaurant,Udon Restaurant,Urban Park,Vegan and Vegetarian Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Winery,Wings Joint
0,596bfbe6920540752e4f4534,False,,,,,"[Salad Restaurant, Pizzeria, Bar]",,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,4e841231d3e393b3e134aa42,False,,0.897207,,,[Scenic Lookout],,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,5df29f0e059a360009132dfc,False,6.9,0.980501,2.0,,"[Bar, Indian Restaurant]",12.0,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,56995b4b498e30d566717b04,False,7.7,0.989678,2.0,,"[Buffet, Indian Restaurant]",69.0,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,84f2b446bdc54b7b0d24a581,False,,,,,[Seafood Restaurant],,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5456,4c4baf4d42b4d13a845e007f,False,,0.442014,2.0,,[BBQ Joint],,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5457,e307b5e251ad4f1a60b8ed60,False,,,,,[Food Truck],,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5458,44397f33f964a5201e321fe3,True,7.9,0.922506,3.0,,[Italian Restaurant],150.0,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5459,5a220fca178a2a17a84893ab,False,6.8,0.946256,1.0,,[Fried Chicken Joint],43.0,,,Seattle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [58]:
drop_list = ['parent_fsq_id', 'parent', 'date_closed','categories','locality']

venue_categories_df = venue_details_df.drop(drop_list, axis=1)

venue_categories_df



Unnamed: 0,fsq_id,verified,rating,popularity,price,total_ratings,African Restaurant,Agriculture and Forestry Service,American Restaurant,Amusement Park,Arcade,Argentinian Restaurant,Art Museum,Arts and Entertainment,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Bathing Area,Bavarian Restaurant,Bay,Beach,Beer Bar,Beer Garden,Belgian Restaurant,Bicycle Store,Bike Trail,Bistro,Botanical Garden,Bowling Alley,Brasserie,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Buffet,Burger Joint,Burrito Restaurant,"Cafe, Coffee, and Tea House",Cafeteria,Café,Cajun and Creole Restaurant,Cambodian Restaurant,Campground,Cantonese Restaurant,Caribbean Restaurant,Casino,Cha Chaan Teng,Chinese Restaurant,Cidery,Cocktail Bar,Coffee Shop,College and University,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Creperie,Cuban Restaurant,Cupcake Shop,Deli,Dessert Shop,Dim Sum Restaurant,Diner,Dining and Drinking,Distillery,Dive Bar,Dive Spot,Dog Park,Donut Shop,Drive-in Theater,Dumpling Restaurant,Dutch Restaurant,Eastern European Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fish and Chips Shop,Food Court,Food Stand,Food Truck,Food and Beverage Retail,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Garden,Gastropub,Gay Bar,Gelato Shop,German Restaurant,Greek Restaurant,Grocery Store,Halal Restaurant,Harbor or Marina,Hawaiian Restaurant,Hiking Trail,Historic and Protected Site,History Museum,Hong Kong Restaurant,Hookah Bar,Hot Dog Joint,Hot Spring,Hotel Bar,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Parlor,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Iraqi Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Lake,Landmarks and Outdoors,Latin American Restaurant,Lebanese Restaurant,Lighthouse,Lounge,Malay Restaurant,Meadery,Meat and Seafood Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Mongolian Restaurant,Monument,Moroccan Restaurant,Mountain,Movie Theater,Museum,Music Venue,National Park,Nature Preserve,New American Restaurant,Night Club,Night Market,Noodle Restaurant,North Indian Restaurant,Other Great Outdoors,Pakistani Restaurant,Park,Pastry Shop,Pedestrian Plaza,Peking Duck Restaurant,Peruvian Restaurant,Pet Café,Piano Bar,Picnic Area,Pie Shop,Pizzeria,Playground,Plaza,Poke Restaurant,Polish Restaurant,Pool Hall,Portuguese Restaurant,Poutine Restaurant,Pub,Puerto Rican Restaurant,RV Park,Ramen Restaurant,Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Rooftop Bar,Russian Restaurant,Sake Bar,Salad Restaurant,Salvadoran Restaurant,Sandwich Spot,Scandinavian Restaurant,Scenic Lookout,Science Museum,Scottish Restaurant,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shawarma Restaurant,Smoothie Shop,Soup Spot,South American Restaurant,Southern Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sports Bar,Sri Lankan Restaurant,State or Provincial Park,Steakhouse,Street Food Gathering,Sushi Restaurant,Szechuan Restaurant,Taco Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Tiki Bar,Travel and Transportation,Turkish Restaurant,Udon Restaurant,Urban Park,Vegan and Vegetarian Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Winery,Wings Joint
0,596bfbe6920540752e4f4534,False,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,4e841231d3e393b3e134aa42,False,,0.897207,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,5df29f0e059a360009132dfc,False,6.9,0.980501,2.0,12.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,56995b4b498e30d566717b04,False,7.7,0.989678,2.0,69.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,84f2b446bdc54b7b0d24a581,False,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5456,4c4baf4d42b4d13a845e007f,False,,0.442014,2.0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5457,e307b5e251ad4f1a60b8ed60,False,,,,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5458,44397f33f964a5201e321fe3,True,7.9,0.922506,3.0,150.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5459,5a220fca178a2a17a84893ab,False,6.8,0.946256,1.0,43.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Percentage of Values Present (Not Missing)
Below we show the percentage of values that are present (**NOT** missing) per column:

In [None]:
# Percentage of values not missing in each column

venues = pd.read_csv('foursquare_venues 14-Jul-23.csv', converters={"categories": literal_eval})

drop_list = ['address', 'postcode']
venues = venues.drop(drop_list, axis=1)

venues.notnull().sum() * 100 / len(venues)


### Merge Extended Dataframe Data with Venue Search Data
Below we merge the data from the venue search with the data from the extended data collection: 

In [60]:
# Merge venues df with venue_details df
full_venue_data = pd.merge(venues, venue_categories_df, how="left", on='fsq_id', 
                           sort=False, copy=True, validate='one_to_one')

columns_to_fill = ['verified','rating','popularity','price']
full_venue_data[columns_to_fill] = full_venue_data[columns_to_fill].fillna(100)

full_venue_data = full_venue_data.fillna(0)

full_venue_data.head()

Unnamed: 0,fsq_id,venue_name,latitude,longitude,verified,rating,popularity,price,total_ratings,African Restaurant,Agriculture and Forestry Service,American Restaurant,Amusement Park,Arcade,Argentinian Restaurant,Art Museum,Arts and Entertainment,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Bathing Area,Bavarian Restaurant,Bay,Beach,Beer Bar,Beer Garden,Belgian Restaurant,Bicycle Store,Bike Trail,Bistro,Botanical Garden,Bowling Alley,Brasserie,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Bubble Tea Shop,Buffet,Burger Joint,Burrito Restaurant,"Cafe, Coffee, and Tea House",Cafeteria,Café,Cajun and Creole Restaurant,Cambodian Restaurant,Campground,Cantonese Restaurant,Caribbean Restaurant,Casino,Cha Chaan Teng,Chinese Restaurant,Cidery,Cocktail Bar,Coffee Shop,College and University,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Creperie,Cuban Restaurant,Cupcake Shop,Deli,Dessert Shop,Dim Sum Restaurant,Diner,Dining and Drinking,Distillery,Dive Bar,Dive Spot,Dog Park,Donut Shop,Drive-in Theater,Dumpling Restaurant,Dutch Restaurant,Eastern European Restaurant,Egyptian Restaurant,Empanada Restaurant,English Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Fast Food Restaurant,Filipino Restaurant,Fish Market,Fish and Chips Shop,Food Court,Food Stand,Food Truck,Food and Beverage Retail,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Garden,Gastropub,Gay Bar,Gelato Shop,German Restaurant,Greek Restaurant,Grocery Store,Halal Restaurant,Harbor or Marina,Hawaiian Restaurant,Hiking Trail,Historic and Protected Site,History Museum,Hong Kong Restaurant,Hookah Bar,Hot Dog Joint,Hot Spring,Hotel Bar,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Parlor,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Iraqi Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Korean BBQ Restaurant,Korean Restaurant,Kosher Restaurant,Lake,Landmarks and Outdoors,Latin American Restaurant,Lebanese Restaurant,Lighthouse,Lounge,Malay Restaurant,Meadery,Meat and Seafood Store,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Mongolian Restaurant,Monument,Moroccan Restaurant,Mountain,Movie Theater,Museum,Music Venue,National Park,Nature Preserve,New American Restaurant,Night Club,Night Market,Noodle Restaurant,North Indian Restaurant,Other Great Outdoors,Pakistani Restaurant,Park,Pastry Shop,Pedestrian Plaza,Peking Duck Restaurant,Peruvian Restaurant,Pet Café,Piano Bar,Picnic Area,Pie Shop,Pizzeria,Playground,Plaza,Poke Restaurant,Polish Restaurant,Pool Hall,Portuguese Restaurant,Poutine Restaurant,Pub,Puerto Rican Restaurant,RV Park,Ramen Restaurant,Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Rooftop Bar,Russian Restaurant,Sake Bar,Salad Restaurant,Salvadoran Restaurant,Sandwich Spot,Scandinavian Restaurant,Scenic Lookout,Science Museum,Scottish Restaurant,Sculpture Garden,Seafood Restaurant,Shanghai Restaurant,Shawarma Restaurant,Smoothie Shop,Soup Spot,South American Restaurant,Southern Food Restaurant,Spa,Spanish Restaurant,Speakeasy,Sports Bar,Sri Lankan Restaurant,State or Provincial Park,Steakhouse,Street Food Gathering,Sushi Restaurant,Szechuan Restaurant,Taco Restaurant,Taiwanese Restaurant,Tapas Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Tiki Bar,Travel and Transportation,Turkish Restaurant,Udon Restaurant,Urban Park,Vegan and Vegetarian Restaurant,Venezuelan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Winery,Wings Joint
0,596bfbe6920540752e4f4534,Romio's Pizza Pub,47.612972,-122.329457,False,100.0,100.0,100.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,4e841231d3e393b3e134aa42,The Pillars,47.613727,-122.328873,False,100.0,0.897207,100.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,5df29f0e059a360009132dfc,Zaika,47.613821,-122.3288,False,6.9,0.980501,2.0,12.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,56995b4b498e30d566717b04,Daawat Indian Grill,47.612652,-122.331429,False,7.7,0.989678,2.0,69.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,84f2b446bdc54b7b0d24a581,Chutneys Queen Anne,47.613821,-122.3288,False,100.0,100.0,100.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
