# Car ride-share potential in mid-size U.S. cities from geographic spread (2)

Second notebook for the IBM Data Science Specialization on Coursera, consolidating and cleaning up the first notebook, to continue with less clutter.

## Copy-forward: All imports so far

In [2]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import re
import math
import json
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  24.55 MB/s
geopy-1.18.1-p 100% |################################| Time: 0:00:00  38.25 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  56.51 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  36.05 MB/s
vincent-0.4.4- 100% |###################

## Copy-forward: Mid-size U.S. cities

In [4]:
url = 'https://en.wikipedia.org/w/index.php?title=List_of_United_States_cities_by_population&oldid=883568308'
website_url = requests.get(url).text
soup = BeautifulSoup(website_url, 'lxml')
city_table = soup.find('table', { 'class' : 'wikitable sortable' })
print("{}\n\n   [...]\n\n{}".format(str(city_table)[:500].replace('\n', '').replace('<tr>', '\n\n<tr>'), str(city_table)[-500:]))

<table class="wikitable sortable" style="text-align:center"><tbody>

<tr><th>2017<br/>rank</th><th>City</th><th>State<sup class="reference" id="cite_ref-5"><a href="#cite_note-5">[5]</a></sup></th><th>2017<br/>estimate</th><th>2010<br/>Census</th><th>Change</th><th colspan="2">2016 land area</th><th colspan="2">2016 population density</th><th>Location</th></tr>

<tr><td>1</td><td style="text-align:left;background-color:#cfecec"><i><a href="/wiki/New_York_City" title="New York 

   [...]

"latitude">38°21′14″N</span> <span class="longitude">121°58′22″W</span></span></span><span class="geo-multi-punct">﻿ / ﻿</span><span class="geo-default"><span class="vcard"><span class="geo-dec" title="Maps, aerial photos, and other data for this location">38.3539°N 121.9728°W</span><span style="display:none">﻿ / <span class="geo">38.3539; -121.9728</span></span><span style="display:none">﻿ (<span class="fn org">Vacaville</span>)</span></span></span></a></span></small>
</td></tr></tbody></table>


In [5]:
l = []

table_rows = city_table.find_all('tr')
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td]
    if len(row) < 1:
        print("(ignoring empty row)")
        test_size = 0
    else:
        test_size = int(row[3].replace(',', ''))
        
    if test_size >= 300000 and test_size <= 400000:
        city_name = re.sub('\[.*\]', '', row[1])
        city_state = row[2]
        city_estd_pop2017 = test_size
        city_latlongraw = re.sub('^.*/', '', re.sub('\(.*\)', '', row[10])).replace(' ', '')
        # strip non-ASCII residue
        city_latlongraw = city_latlongraw.encode('ascii',errors='ignore').decode()
        city_lat = float(re.sub(';.*$', '', city_latlongraw))
        city_long = float(re.sub('^.*;', '', city_latlongraw))
        l.append([city_name, city_state, city_estd_pop2017, city_lat, city_long])

cities_df = pd.DataFrame(l)
cities_df.columns = ['City name', 'City state', 'Population', 'Latitude', 'Longitude']
print(cities_df)

(ignoring empty row)
         City name    City state  Population  Latitude  Longitude
0        Arlington         Texas      396394   32.7007   -97.1247
1      New Orleans     Louisiana      393292   30.0534   -89.9345
2          Wichita        Kansas      390591   37.6907   -97.3459
3        Cleveland          Ohio      385525   41.4785   -81.6794
4            Tampa       Florida      385430   27.9701   -82.4797
5      Bakersfield    California      380874   35.3212  -119.0183
6           Aurora      Colorado      366623   39.6880  -104.6897
7          Anaheim    California      352497   33.8555  -117.7601
8         Honolulu        Hawaii      350395   21.3243  -157.8476
9        Santa Ana    California      334136   33.7363  -117.8830
10       Riverside    California      327728   33.9381  -117.3932
11  Corpus Christi         Texas      325605   27.7543   -97.1734
12       Lexington      Kentucky      321959   38.0407   -84.4583
13        Stockton    California      310496   37.9763 

## Copy-forward: Foursquare functions

<!--
CLIENT_ID = 'NBLOR5JJCSM43LTXYWBQYVJ5U3LMNZ2ULCHERZAZVLJTHBYA'
CLIENT_SECRET = 'D1G4RELNK2MGSOZSO1C4DTGPYBAWHIW0MQJTXWSBTGH2JL41'
-->

In [8]:
print('CLIENT_ID set: {}'.format(CLIENT_ID is not None))
print('CLIENT_SECRET set: {}'.format(CLIENT_SECRET is not None))

VERSION = '20180605' # Foursquare API version

CLIENT_ID set: True
CLIENT_SECRET set: True


### getVenuesNearLatLong

In [79]:
def getVenuesNearLatLong(latitude, longitude, radius=500, limit=100, verbose=True):
    
    venues_list=[]
                
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION,
            latitude, 
            longitude, 
            radius, 
            limit)
            
    # make the GET request
    results_raw = requests.get(url)
    try:
        results = results_raw.json()["response"]['groups'][0]['items']
    except:
        print('(err)', end='')
        results = []
        
    # return only relevant information for each nearby venue
    venues_list.append([(
            latitude, 
            longitude, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    if (len(results) > 0):
        nearby_venues.columns = [
                  'Latitude', 
                  'Longitude', 
                  'Venue', 
                  'Venue Category']
    
    if verbose:
        print('found {} venues within {} meters of {}/{}'.format(len(results), radius, latitude, longitude))
    else:
        print('{}'.format(len(results)), end='. ')
    
    return(nearby_venues)

### get_venues_in_hex_grid

In [80]:
def get_venues_in_hex_grid(latitude, longitude, venues_dict, this_coord, radius=500, limit=100, new_coords=[], verbose=True):
    '''
    Calls Foursquare in a hex grid around a given coordinate point. If venues
    have already been searched on one of the hex grid points, that result is
    kept and no new search is executed.
    
    Parameters:
    
    latitude and longitude are as of the origin coordinate (0, 0),
    venues_dict are the venues found so far (dictionary keys are a coordinate tuple),
    this_coord is the center coordinate around which the hex grid is to be searched,
    radius is the radius [meters] to search around a coordinate point,
    limit is the maximum number of venues to return from a Foursquare search.
    new_coords is a list of coordinate points that wasn't probed yet
    
    Returns a list of new coordinate tuples appended to the new_coords parameter, if any
    '''
    
    r_earth = 6378000. # approximate radius of the Earth in meters
    pi = math.pi
    sqrt_three = math.sqrt(3.)
    overlap = 1.4 # 40% overlap
    
    cx = this_coord[0] # center X
    cy = this_coord[1] # center Y
    hex_coords = [ (cx-1,cy), (cx+1, cy), (cx,cy-1), (cx,cy+1), (cx-1,cy+1), (cx+1,cy-1) ] # the gex grid around this_coord
    
    if (cx, cy) in new_coords:
        new_coords.remove((cx, cy))
    
    for this_hex in hex_coords:
        if not this_hex in venues_dict:
            # the coordinate has not been searched for
            
            # get the x- and y-step from a hex grid; start with a square grid (letting the circles overlap a bit):
            dx_square = this_hex[0] * radius * ( overlap / 2. )
            dy_square = this_hex[1] * radius * ( overlap / 2. )
            # now convert to a hex grid:
            dx = dx_square + dy_square / 2.
            dy = dy_square * ( sqrt_three / 2. )
            # approximate the center point's latitude and longitude assuming locally flat Earth
            hex_latitude  = latitude  + (dy / r_earth) * (180 / pi);
            hex_longitude = longitude + (dx / r_earth) * (180 / pi) / math.cos(latitude * pi/180);
            
            if verbose:
                print('getting coordinate {}...'.format(this_hex))
            else:
                print('({},{}):'.format(this_hex[0], this_hex[1]), end='')
                
            this_venues = getVenuesNearLatLong(hex_latitude, hex_longitude, radius=radius, limit=limit, verbose=verbose)
            venues_dict[this_hex] = this_venues
            if not this_hex in new_coords:
                new_coords.append(this_hex)
    
    return new_coords

### make_map_from_dict

In [81]:
def make_map_from_dict(lat_orig, long_orig, venues_dict, zoom_start, city_name='(city_name)', city_state='(city_state)', radius_zoom=1.0):
    new_map = folium.Map(location=[lat_orig, long_orig], zoom_start=zoom_start)

    # add markers to map
    for coords, venues in venues_dict.items():
        if venues.shape[0] > 0: 
            label = '{}, # venues: {}'.format(coords, venues.shape[0])
            label = folium.Popup(label, parse_html=True)
            folium.CircleMarker(
                [venues['Latitude'][0], venues['Longitude'][0]],
                radius=venues.shape[0] * radius_zoom,
                popup=label,
                color='blue',
                fill=True,
                fill_color='#3186cc',
                fill_opacity=0.7,
                parse_html=False).add_to(new_map)
        
    legend_html = ('<div style="position: fixed; top: 30px; right: 50px; width: 450px;' 
                + 'height: 30px; border: 2px solid grey; z-index: 9999; font-size: 16px; background-color: white">' 
                + '&nbsp;{},&nbsp;{}' 
                + '</div>').format(
                     city_name,
                     city_state
                     )
    new_map.get_root().html.add_child(folium.Element(legend_html))
     
    return new_map

### find_venues_geo_distribution

In [82]:
def find_venues_geo_distribution(cities_df, city_index, max_coords_tested=100, radius=1500, limit=100, verbose=True):
    
    city_name = cities_df['City name'][city_index]
    city_state = cities_df['City state'][city_index]
    
    # initialize venues_dict with the venues dataframe at the origin
    venues_dict = {}
    origin_coord = (0,0)
    lat_orig = cities_df.Latitude[city_index]
    long_orig = cities_df.Longitude[city_index]
    print('[test #1 of {}] (0,0):'.format(max_coords_tested), end='')
    venues_df = getVenuesNearLatLong(lat_orig, long_orig, radius=radius, verbose=verbose)
    venues_dict[origin_coord] = venues_df
    
    # mark the origin as the first (and only) coordinate point not yet explored
    new_coords = [(0, 0)]
    num_coords_tested = 1
    
    while num_coords_tested < max_coords_tested:
        
        highest_venues = -1
        new_test_coord = None
        
        for this_coord in new_coords:
            venues_df = venues_dict[this_coord]
            if venues_df.shape[0] > highest_venues:
                new_test_coord = this_coord
                highest_venues = venues_df.shape[0]
        
        # call the hex grid exploration function
        num_coords_tested += 1
        print('[test #{} of {}]'.format(num_coords_tested, max_coords_tested), end=' ')
        new_coords = get_venues_in_hex_grid(lat_orig, long_orig, venues_dict, new_test_coord, radius=radius, new_coords=new_coords, limit=limit, verbose=verbose )
        
    return lat_orig, long_orig, venues_dict, city_name, city_state

## Copy-forward: Quick validation tests (Arlington and New Orleans)

In [83]:
lat_orig, long_orig, venues_dict, city_name, city_state = find_venues_geo_distribution(cities_df, 0, radius=1500, max_coords_tested=20, verbose=False)

[test #1 of 20] (0,0):58. [test #2 of 20] (-1,0):64. (1,0):72. (0,-1):96. (0,1):63. (-1,1):50. (1,-1):84. [test #3 of 20] (-1,-1):47. (0,-2):100. (1,-2):94. [test #4 of 20] (-1,-2):42. (0,-3):100. (1,-3):100. [test #5 of 20] (-1,-3):33. (0,-4):98. (1,-4):100. [test #6 of 20] (2,-3):100. (2,-4):100. [test #7 of 20] (1,-5):94. (2,-5):71. [test #8 of 20] (3,-3):97. (2,-2):100. (3,-4):73. [test #9 of 20] (3,-5):27. [test #10 of 20] (3,-2):74. (2,-1):60. [test #11 of 20] (-1,-4):25. (0,-5):11. [test #12 of 20] (4,-3):76. (4,-4):74. [test #13 of 20] [test #14 of 20] (1,-6):46. (2,-6):73. [test #15 of 20] [test #16 of 20] (5,-3):28. (4,-2):46. (5,-4):23. [test #17 of 20] (3,-1):15. [test #18 of 20] (4,-5):20. (5,-5):15. [test #19 of 20] [test #20 of 20] (3,-6):78. (2,-7):44. (3,-7):41. 

In [84]:
make_map_from_dict(lat_orig, long_orig, venues_dict, 13, city_name, city_state, radius_zoom=0.2)

In [119]:
lat_orig, long_orig, venues_dict, city_name, city_state = find_venues_geo_distribution(cities_df, 1, radius=1500, max_coords_tested=20, verbose=False)

[test #1 of 20] (0,0):4. [test #2 of 20] (-1,0):5. (1,0):3. (0,-1):9. (0,1):2. (-1,1):6. (1,-1):5. [test #3 of 20] (-1,-1):31. (0,-2):18. (1,-2):7. [test #4 of 20] (-2,-1):33. (-1,-2):29. (-2,0):24. [test #5 of 20] (-3,-1):36. (-2,-2):40. (-3,0):20. [test #6 of 20] (-3,-2):19. (-2,-3):26. (-1,-3):22. [test #7 of 20] (-4,-1):19. (-4,0):8. [test #8 of 20] (0,-3):9. [test #9 of 20] (-3,-3):24. (-2,-4):16. (-1,-4):14. [test #10 of 20] (-2,1):3. (-3,1):8. [test #11 of 20] (-4,-3):10. (-3,-4):16. (-4,-2):25. [test #12 of 20] (-5,-2):19. (-5,-1):8. [test #13 of 20] (0,-4):11. [test #14 of 20] (-4,1):6. [test #15 of 20] [test #16 of 20] (-5,0):7. [test #17 of 20] (-6,-2):12. (-5,-3):18. (-6,-1):8. [test #18 of 20] (1,-3):8. [test #19 of 20] (-6,-3):17. (-5,-4):16. (-4,-4):18. [test #20 of 20] (-4,-5):12. (-3,-5):9. 

In [120]:
make_map_from_dict(lat_orig, long_orig, venues_dict, 13, city_name, city_state, radius_zoom=0.2)

## Determine aggregates describing the geographical distribution

In order to cluster cities by shape of their venues, we need to calculate aggregate variables:

* Determine the center of the venues by averaging each coordinate point, weighed by the number of venues there.
* Determine the mean distance of all venues from that center.
* Determine the standard deviation of the distance distribution, as well as skewedness and kurtosis ("peaky-ness"). Once we have these values, all details on the geographic venues distribution will be discarded.

But first we have to simplify the dataframe, to sum up the number of indicator venues as compared to the total number of venues.

### Check whether a venue is of an indicator type

Define a vendor indicator function now so that the remainder of the algorithm can be developed and tested. As cites are tested for venues, I'll keep updating this function. What you see is the finished function after all the incremental updates along the way.

In [121]:
def is_indicator_venue(venue_type):
    if venue_type is None:
        return False
    
    magic_words = [
        'yoga',
        'salad',
        'coworking',
        'alternative',
        'bike',
        'gym',
        'fitness',
        'running',
        'jogging',
        'cycling',
        'cycle',
        'athletics',
        'gluten',
        'health',
        'recreation',
        'tennis',
        'vegetarian',
        'vegan',
        'art',
        'tennis',
        'sports',
        'disc golf',
        'pilates',
        'share',
        'sharing'
    ]
    return any(substring in venue_type.lower() for substring in magic_words)

print('Perform some tests on indicator venues:')
print(is_indicator_venue(None))
print(is_indicator_venue(''))
print(is_indicator_venue('Salad Bar'))
print(is_indicator_venue('Chinese Restaurant'))
print(is_indicator_venue('Coworking space'))

Perform some tests on indicator venues:
False
False
True
False
True


### Aggregate venues_dict into a DataFrame with one row per coordinate

For all coordinates, count the number of venues at that coordinate as well as the number of indicator venues

In [122]:
def aggregate_venues_dict(venues_dict):
    venues_agg = []
    venues_types = []
    for coord, venues in venues_dict.items():
        num_venues = venues.shape[0]
        num_indicators = 0
        if num_venues > 0:
            for this_type in venues['Venue Category'].values:
                venues_types.append(this_type)
                if is_indicator_venue(this_type):
                    num_indicators += 1
        venues_agg.append([coord, num_venues, num_indicators])
    venues_agg_df = pd.DataFrame(venues_agg)
    venues_agg_df.columns = ['coord', 'num_venues', 'num_indicators']
    return venues_agg_df, set(venues_types)

Do a quick test

In [123]:
venues_agg_df, venues_types = aggregate_venues_dict(venues_dict)
venues_agg_df.head(10)

Unnamed: 0,coord,num_venues,num_indicators
0,"(-1, 0)",5,0
1,"(-3, -1)",36,0
2,"(-1, -3)",22,1
3,"(-6, -2)",12,0
4,"(1, -1)",5,0
5,"(-2, 0)",24,1
6,"(-2, 1)",3,0
7,"(-3, -3)",24,1
8,"(-2, -4)",16,0
9,"(1, -2)",7,0


Confirm the result by looking at a coordinate. Pick (-1, -3) since we have an indicator venue there:

In [124]:
for this_type in venues_dict[(-1, -3)]['Venue Category'].values:
    print('{}:  {}'.format(is_indicator_venue(this_type), this_type))

False:  Park
False:  Pharmacy
False:  Fried Chicken Joint
False:  Discount Store
False:  Bank
False:  Pizza Place
False:  Bank
True:  Gym / Fitness Center
False:  Sandwich Place
False:  Rental Car Location
False:  Sandwich Place
False:  Breakfast Spot
False:  Southern / Soul Food Restaurant
False:  Grocery Store
False:  Seafood Restaurant
False:  Pharmacy
False:  American Restaurant
False:  Seafood Restaurant
False:  Hotel
False:  Restaurant
False:  Construction & Landscaping
False:  Hardware Store


Works. Let's scan some cities for venue types, and update the indicator function above.

In [91]:
lat_orig, long_orig, venues_dict, city_name, city_state = find_venues_geo_distribution(cities_df, 0, radius=800, max_coords_tested=1000, verbose=False)

[test #1 of 1000] (0,0):26. [test #2 of 1000] (-1,0):14. (1,0):26. (0,-1):22. (0,1):37. (-1,1):24. (1,-1):22. [test #3 of 1000] (1,1):40. (0,2):35. (-1,2):25. [test #4 of 1000] (2,1):25. (1,2):35. (2,0):18. [test #5 of 1000] (0,3):27. (-1,3):17. [test #6 of 1000] (2,2):19. (1,3):25. [test #7 of 1000] (0,4):40. (-1,4):25. [test #8 of 1000] (1,4):14. (0,5):31. (-1,5):31. [test #9 of 1000] (1,5):8. (0,6):20. (-1,6):36. [test #10 of 1000] (-2,6):30. (-1,7):36. (-2,7):35. [test #11 of 1000] (0,7):42. (-1,8):53. (-2,8):40. [test #12 of 1000] (0,8):37. (-1,9):35. (-2,9):32. [test #13 of 1000] (1,7):26. (1,6):12. [test #14 of 1000] (-3,8):21. (-3,9):18. [test #15 of 1000] (1,8):28. (0,9):33. [test #16 of 1000] (-3,7):8. [test #17 of 1000] (-1,10):27. (-2,10):22. [test #18 of 1000] (1,9):17. (0,10):41. [test #19 of 1000] (1,10):21. (0,11):41. (-1,11):53. [test #20 of 1000] (-2,11):31. (-1,12):99. (-2,12):68. [test #21 of 1000] (0,12):58. (-1,13):59. (-2,13):76. [test #22 of 1000] (-3,13):49. (-

6. [test #217 of 1000] (7,11):5. [test #218 of 1000] (7,-8):3. [test #219 of 1000] (-6,16):4. (-7,16):6. [test #220 of 1000] (-8,15):5. (-8,16):5. [test #221 of 1000] (12,-1):12. (11,-2):20. (12,-2):19. [test #222 of 1000] (10,-2):9. (11,-3):11. (12,-3):17. [test #223 of 1000] (13,-2):8. (13,-3):8. [test #224 of 1000] (-10,8):6. (-10,9):8. [test #225 of 1000] (-3,20):2. (-4,21):5. (-5,21):6. [test #226 of 1000] [test #227 of 1000] [test #228 of 1000] [test #229 of 1000] (-2,-4):10. [test #230 of 1000] (2,-12):7. [test #231 of 1000] (8,-8):1. (9,-8):11. [test #232 of 1000] (5,9):11. [test #233 of 1000] (14,2):5. [test #234 of 1000] (9,-1):5. [test #235 of 1000] (10,-7):15. (10,-8):9. [test #236 of 1000] (-2,18):10. [test #237 of 1000] (-6,19):6. (-6,20):11. [test #238 of 1000] (-6,21):5. [test #239 of 1000] (12,-4):4. (13,-4):45. [test #240 of 1000] (14,-4):50. (13,-5):32. (14,-5):53. [test #241 of 1000] (15,-5):69. (14,-6):43. (15,-6):71. [test #242 of 1000] (16,-6):65. (15,-7):8. (16,

5. [test #441 of 1000] (15,-27):1. (14,-26):2. [test #442 of 1000] [test #443 of 1000] (10,-28):6. [test #444 of 1000] (11,-29):4. [test #445 of 1000] (13,-25):4. [test #446 of 1000] (16,-30):14. (16,-31):12. [test #447 of 1000] (17,-30):11. (16,-29):15. (17,-31):9. [test #448 of 1000] (17,-29):6. (16,-28):5. [test #449 of 1000] [test #450 of 1000] [test #451 of 1000] (11,-30):7. [test #452 of 1000] (11,-31):2. (12,-32):13. [test #453 of 1000] (11,-32):4. (12,-33):4. (13,-33):6. [test #454 of 1000] (16,-32):9. [test #455 of 1000] (17,-32):6. [test #456 of 1000] [test #457 of 1000] (16,-33):2. [test #458 of 1000] (18,-30):5. (18,-31):6. [test #459 of 1000] (4,8):10. [test #460 of 1000] (10,-16):12. [test #461 of 1000] (11,-16):3. (11,-17):8. [test #462 of 1000] (16,-23):5. (16,-24):11. [test #463 of 1000] (17,-24):3. (16,-25):10. (17,-25):4. [test #464 of 1000] [test #465 of 1000] (11,-24):5. [test #466 of 1000] [test #467 of 1000] [test #468 of 1000] [test #469 of 1000] [test #470 of 1

23. (13,9):14. (14,9):18. [test #675 of 1000] (15,10):17. (14,11):19. (15,9):19. [test #676 of 1000] (15,11):28. (14,12):7. (13,12):5. [test #677 of 1000] (16,11):34. (15,12):17. (16,10):26. [test #678 of 1000] (17,11):17. (16,12):21. (17,10):18. [test #679 of 1000] (16,9):19. (17,9):17. [test #680 of 1000] (17,12):6. (16,13):13. (15,13):16. [test #681 of 1000] (15,8):11. (16,8):3. [test #682 of 1000] (17,8):5. [test #683 of 1000] (14,8):11. [test #684 of 1000] (18,10):13. (18,9):13. [test #685 of 1000] [test #686 of 1000] (14,13):18. [test #687 of 1000] (13,13):9. (14,14):16. (13,14):7. [test #688 of 1000] (18,11):11. [test #689 of 1000] (18,8):5. [test #690 of 1000] (15,14):9. [test #691 of 1000] (14,15):6. (13,15):11. [test #692 of 1000] (12,9):3. (13,8):9. [test #693 of 1000] (17,13):1. (16,14):6. [test #694 of 1000] (19,10):8. (19,9):1. [test #695 of 1000] (19,8):4. [test #696 of 1000] (15,7):8. (16,7):7. [test #697 of 1000] (14,7):5. [test #698 of 1000] (19,11):9. (18,12):8. [tes

7. (-4,-10):8. [test #895 of 1000] (-3,-9):16. (-4,-8):5. (-3,-10):16. [test #896 of 1000] (-2,-9):10. (-3,-8):5. (-2,-10):15. [test #897 of 1000] (-3,-11):5. (-2,-11):13. [test #898 of 1000] (-1,-10):10. (-1,-11):8. [test #899 of 1000] (-2,-12):5. (-1,-12):4. [test #900 of 1000] (-1,-9):5. (-2,-8):2. [test #901 of 1000] (0,-10):6. [test #902 of 1000] (-4,-11):7. [test #903 of 1000] [test #904 of 1000] (-5,-11):9. [test #905 of 1000] (-6,-11):9. (-5,-12):21. (-4,-12):19. [test #906 of 1000] (-6,-12):5. (-5,-13):14. (-4,-13):19. [test #907 of 1000] (-3,-12):12. (-3,-13):18. [test #908 of 1000] (-4,-14):10. (-3,-14):14. [test #909 of 1000] (-2,-13):7. (-2,-14):3. [test #910 of 1000] (-6,-13):4. (-5,-14):1. [test #911 of 1000] (-3,-15):2. (-2,-15):1. [test #912 of 1000] [test #913 of 1000] (-4,-15):2. [test #914 of 1000] (-7,-11):7. [test #915 of 1000] [test #916 of 1000] (-1,-13):5. (-1,-14):5. [test #917 of 1000] (-8,-11):5. (-7,-12):4. [test #918 of 1000] (0,-9):1. [test #919 of 1000] 

In [93]:
venues_agg_df, venues_types = aggregate_venues_dict(venues_dict)
venues_types

{'ATM',
 'Accessories Store',
 'Adult Boutique',
 'Airport',
 'Airport Terminal',
 'Alternative Healer',
 'American Restaurant',
 'Antique Shop',
 'Arcade',
 'Art Gallery',
 'Arts & Crafts Store',
 'Asian Restaurant',
 'Assisted Living',
 'Athletics & Sports',
 'Auto Garage',
 'Auto Workshop',
 'Automotive Shop',
 'BBQ Joint',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Baseball Field',
 'Baseball Stadium',
 'Basketball Court',
 'Basketball Stadium',
 'Beach',
 'Bed & Breakfast',
 'Beer Bar',
 'Beer Garden',
 'Big Box Store',
 'Bike Shop',
 'Board Shop',
 'Boat or Ferry',
 'Bookstore',
 'Boutique',
 'Bowling Alley',
 'Boxing Gym',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Bridal Shop',
 'Bubble Tea Shop',
 'Buffet',
 'Building',
 'Burger Joint',
 'Burrito Place',
 'Bus Station',
 'Bus Stop',
 'Business Service',
 'Cafeteria',
 'Café',
 'Cajun / Creole Restaurant',
 'Camera Store',
 'Campground',
 'Candy Store',
 'Caribbean Restaurant',
 'Carpet Store',
 'Casino',
 'Che

In [101]:
lat_orig, long_orig, venues_dict, city_name, city_state = find_venues_geo_distribution(cities_df, 1, radius=800, max_coords_tested=1000, verbose=False)

[test #1 of 1000] (0,0):2. [test #2 of 1000] (-1,0):4. (1,0):4. (0,-1):3. (0,1):3. (-1,1):2. (1,-1):3. [test #3 of 1000] (-2,0):5. (-1,-1):2. (-2,1):5. [test #4 of 1000] (-3,0):4. (-2,-1):5. (-3,1):4. [test #5 of 1000] (-2,2):2. (-3,2):4. [test #6 of 1000] (-3,-1):12. (-2,-2):18. (-1,-2):2. [test #7 of 1000] (-3,-2):27. (-2,-3):24. (-1,-3):11. [test #8 of 1000] (-4,-2):22. (-3,-3):23. (-4,-1):20. [test #9 of 1000] (-2,-4):12. (-1,-4):11. [test #10 of 1000] (-4,-3):10. (-3,-4):4. [test #11 of 1000] (-5,-2):16. (-5,-1):17. [test #12 of 1000] (-4,0):8. (-5,0):10. [test #13 of 1000] (-6,-1):4. (-6,0):5. [test #14 of 1000] (-6,-2):9. (-5,-3):12. [test #15 of 1000] [test #16 of 1000] (-2,-5):4. (-1,-5):5. [test #17 of 1000] (-6,-3):13. (-5,-4):18. (-4,-4):19. [test #18 of 1000] (-4,-5):17. (-3,-5):9. [test #19 of 1000] (-6,-4):13. (-5,-5):16. [test #20 of 1000] (-4,-6):8. (-3,-6):12. [test #21 of 1000] (-6,-5):2. (-5,-6):4. [test #22 of 1000] (-7,-3):5. (-7,-2):8. [test #23 of 1000] (-7,-4):

52. [test #191 of 1000] (-17,-28):25. (-17,-27):38. [test #192 of 1000] (-14,-27):34. (-15,-26):35. (-16,-26):55. [test #193 of 1000] (-17,-26):48. (-16,-25):36. (-17,-25):27. [test #194 of 1000] [test #195 of 1000] (-18,-26):37. (-18,-25):21. [test #196 of 1000] (-18,-31):24. (-17,-32):14. (-18,-30):41. [test #197 of 1000] [test #198 of 1000] [test #199 of 1000] (-15,-23):25. [test #200 of 1000] (-7,-21):46. (-7,-22):29. [test #201 of 1000] (-6,-21):25. (-7,-20):28. (-6,-22):21. [test #202 of 1000] [test #203 of 1000] (-8,-19):15. [test #204 of 1000] (-14,-33):12. (-13,-33):2. [test #205 of 1000] [test #206 of 1000] (-12,-33):3. [test #207 of 1000] (-18,-29):26. [test #208 of 1000] (-19,-30):27. (-19,-29):27. [test #209 of 1000] [test #210 of 1000] (-8,-23):26. [test #211 of 1000] (-12,-17):19. (-13,-17):17. [test #212 of 1000] (-9,-32):10. [test #213 of 1000] (-15,-33):12. [test #214 of 1000] [test #215 of 1000] [test #216 of 1000] (-18,-27):31. [test #217 of 1000] (-22,-14):13. [tes

6. (-25,-9):5. [test #418 of 1000] (-25,-11):32. [test #419 of 1000] (-26,-11):16. (-25,-12):15. (-26,-10):17. [test #420 of 1000] (-24,-13):9. [test #421 of 1000] (-26,-9):16. [test #422 of 1000] (2,-32):24. (1,-31):35. (0,-31):12. [test #423 of 1000] (2,-31):31. (1,-30):37. (0,-30):24. [test #424 of 1000] (2,-30):27. (1,-29):31. (0,-29):28. [test #425 of 1000] (3,-31):21. (3,-32):15. [test #426 of 1000] (2,-29):14. (1,-28):8. (0,-28):8. [test #427 of 1000] (-1,-29):24. (-1,-28):18. [test #428 of 1000] (3,-30):13. [test #429 of 1000] (3,-33):13. [test #430 of 1000] [test #431 of 1000] (-2,-28):9. [test #432 of 1000] (4,-31):8. (4,-32):12. [test #433 of 1000] (-1,-27):7. (-2,-27):8. [test #434 of 1000] [test #435 of 1000] [test #436 of 1000] (-5,-20):7. (-6,-19):9. [test #437 of 1000] (-23,-19):5. [test #438 of 1000] [test #439 of 1000] [test #440 of 1000] (5,-42):7. (6,-42):5. [test #441 of 1000] (-27,-10):16. (-27,-9):14. [test #442 of 1000] (-24,-17):7. [test #443 of 1000] [test #44

16. (-37,-17):28. (-38,-17):22. [test #611 of 1000] (-39,-18):18. (-39,-17):28. [test #612 of 1000] (-36,-17):18. (-37,-16):12. (-38,-16):16. [test #613 of 1000] (-40,-17):14. (-39,-16):19. (-40,-16):15. [test #614 of 1000] (-38,-21):19. (-37,-22):17. [test #615 of 1000] (-39,-20):14. (-39,-19):8. [test #616 of 1000] [test #617 of 1000] (-33,-24):11. (-32,-25):26. (-31,-25):12. [test #618 of 1000] (-33,-25):11. (-32,-26):31. (-31,-26):27. [test #619 of 1000] (-33,-26):26. (-32,-27):21. (-31,-27):21. [test #620 of 1000] (-30,-26):12. (-30,-27):12. [test #621 of 1000] (-34,-26):13. (-33,-27):16. (-34,-25):5. [test #622 of 1000] [test #623 of 1000] (-29,-22):30. (-30,-21):24. (-29,-23):10. [test #624 of 1000] (-28,-22):24. (-29,-21):31. (-28,-23):8. [test #625 of 1000] (-28,-21):25. [test #626 of 1000] (-27,-21):15. (-27,-22):14. [test #627 of 1000] [test #628 of 1000] (-27,-23):5. [test #629 of 1000] (-30,-25):12. [test #630 of 1000] (-32,-28):10. (-31,-28):15. [test #631 of 1000] (-30,-

7. (-4,-19):2. (-5,-19):4. [test #851 of 1000] [test #852 of 1000] [test #853 of 1000] [test #854 of 1000] [test #855 of 1000] (-24,-18):13. [test #856 of 1000] (-24,-19):7. [test #857 of 1000] [test #858 of 1000] [test #859 of 1000] [test #860 of 1000] [test #861 of 1000] [test #862 of 1000] [test #863 of 1000] [test #864 of 1000] (-51,-13):5. [test #865 of 1000] [test #866 of 1000] [test #867 of 1000] [test #868 of 1000] (-28,-24):5. [test #869 of 1000] (-32,-29):3. [test #870 of 1000] (-30,-5):2. [test #871 of 1000] (-51,-2):9. (-52,-2):11. [test #872 of 1000] (-53,-2):1. (-52,-1):3. (-53,-1):0. [test #873 of 1000] (-34,-28):6. [test #874 of 1000] [test #875 of 1000] (-51,-15):8. (-51,-14):6. [test #876 of 1000] [test #877 of 1000] (-42,-18):8. (-42,-19):6. [test #878 of 1000] (-44,-20):1. (-43,-20):1. [test #879 of 1000] (-42,-20):4. [test #880 of 1000] (-43,-16):6. [test #881 of 1000] (-46,-19):8. (-45,-20):2. [test #882 of 1000] [test #883 of 1000] [test #884 of 1000] (-2,-21):15

In [102]:
venues_agg_df, venues_types = aggregate_venues_dict(venues_dict)
venues_types

{'Accessories Store',
 'African Restaurant',
 'Airport',
 'Airport Gate',
 'Airport Lounge',
 'Airport Service',
 'Airport Terminal',
 'American Restaurant',
 'Antique Shop',
 'Aquarium',
 'Arcade',
 'Argentinian Restaurant',
 'Art Gallery',
 'Art Museum',
 'Art Studio',
 'Arts & Crafts Store',
 'Arts & Entertainment',
 'Asian Restaurant',
 'Assisted Living',
 'Athletics & Sports',
 'Auto Dealership',
 'Auto Garage',
 'Auto Workshop',
 'Automotive Shop',
 'BBQ Joint',
 'Baby Store',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Baseball Field',
 'Baseball Stadium',
 'Basketball Court',
 'Basketball Stadium',
 'Bath House',
 'Beach',
 'Beach Bar',
 'Bed & Breakfast',
 'Beer Bar',
 'Beer Garden',
 'Beer Store',
 'Betting Shop',
 'Big Box Store',
 'Bike Rental / Bike Share',
 'Bike Trail',
 'Bistro',
 'Boat or Ferry',
 'Bookstore',
 'Boutique',
 'Bowling Alley',
 'Boxing Gym',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Bridal Shop',
 'Bubble Tea Shop',
 'Buffet',
 'Burger Jo

In [109]:
lat_orig, long_orig, venues_dict, city_name, city_state = find_venues_geo_distribution(cities_df, 2, radius=800, max_coords_tested=1000, verbose=False)

[test #1 of 1000] (0,0):43. [test #2 of 1000] (-1,0):33. (1,0):46. (0,-1):58. (0,1):18. (-1,1):16. (1,-1):58. [test #3 of 1000] (-1,-1):44. (0,-2):43. (1,-2):53. [test #4 of 1000] (2,-1):78. (2,-2):55. [test #5 of 1000] (3,-1):87. (2,0):83. (3,-2):73. [test #6 of 1000] (4,-1):74. (3,0):79. (4,-2):52. [test #7 of 1000] (2,1):36. (1,1):28. [test #8 of 1000] (4,0):55. (3,1):29. [test #9 of 1000] (5,-1):24. (5,-2):24. [test #10 of 1000] (3,-3):29. (4,-3):16. [test #11 of 1000] (2,-3):28. [test #12 of 1000] (5,0):24. (4,1):5. [test #13 of 1000] (1,-3):32. [test #14 of 1000] (5,-3):9. [test #15 of 1000] [test #16 of 1000] (-2,-1):15. (-1,-2):28. (-2,0):26. [test #17 of 1000] (0,-3):25. [test #18 of 1000] (2,2):10. (1,2):22. [test #19 of 1000] (-2,1):11. [test #20 of 1000] (1,-4):12. (2,-4):11. [test #21 of 1000] (3,2):5. [test #22 of 1000] (3,-4):17. (4,-4):18. [test #23 of 1000] (0,2):16. [test #24 of 1000] [test #25 of 1000] (-2,-2):10. (-1,-3):8. [test #26 of 1000] (-3,0):8. (-3,1):7. [te

7. (7,-14):5. [test #208 of 1000] (8,-12):4. [test #209 of 1000] (1,-6):11. [test #210 of 1000] (5,-14):6. [test #211 of 1000] (7,-8):4. [test #212 of 1000] (4,-7):6. [test #213 of 1000] (1,-10):10. (2,-11):5. [test #214 of 1000] (9,-8):13. [test #215 of 1000] (8,-8):10. (8,-7):5. [test #216 of 1000] [test #217 of 1000] (-3,2):8. [test #218 of 1000] [test #219 of 1000] [test #220 of 1000] (2,3):8. [test #221 of 1000] [test #222 of 1000] [test #223 of 1000] (20,0):6. (19,1):9. [test #224 of 1000] [test #225 of 1000] (-4,3):4. [test #226 of 1000] (0,-5):4. [test #227 of 1000] [test #228 of 1000] [test #229 of 1000] (8,-6):8. [test #230 of 1000] (13,-10):11. (13,-11):10. [test #231 of 1000] (11,-12):7. (12,-12):7. [test #232 of 1000] (10,-12):5. [test #233 of 1000] [test #234 of 1000] (9,-12):3. [test #235 of 1000] (0,-6):4. [test #236 of 1000] (14,-10):5. (13,-9):8. (14,-11):4. [test #237 of 1000] [test #238 of 1000] (-3,-2):7. (-2,-3):6. [test #239 of 1000] (4,2):5. [test #240 of 1000] 

5. [test #435 of 1000] [test #436 of 1000] (-11,-1):2. [test #437 of 1000] (-4,-2):6. [test #438 of 1000] (-13,-4):16. (-12,-5):17. [test #439 of 1000] (-14,-3):15. (-14,-2):8. [test #440 of 1000] [test #441 of 1000] (-6,-6):4. [test #442 of 1000] (-10,2):10. [test #443 of 1000] (-13,-5):12. (-12,-6):6. (-11,-6):4. [test #444 of 1000] [test #445 of 1000] (-5,1):9. [test #446 of 1000] (-10,1):11. [test #447 of 1000] (-14,-4):15. [test #448 of 1000] [test #449 of 1000] (-11,4):4. (-11,5):4. [test #450 of 1000] (-12,0):21. (-13,0):26. [test #451 of 1000] (-14,0):16. (-13,1):22. (-14,1):25. [test #452 of 1000] (-15,1):18. (-14,2):18. (-15,2):7. [test #453 of 1000] (-12,1):20. (-13,2):18. [test #454 of 1000] (-11,0):10. [test #455 of 1000] (-11,1):11. (-12,2):5. [test #456 of 1000] (-16,1):27. (-15,0):20. (-16,2):14. [test #457 of 1000] (-17,1):24. (-16,0):25. (-17,2):22. [test #458 of 1000] (-17,0):23. (-16,-1):4. (-15,-1):17. [test #459 of 1000] (-18,1):23. (-18,2):7. [test #460 of 1000] 

1. (-26,11):0. [test #688 of 1000] (-14,9):6. (-15,10):0. (-16,10):1. [test #689 of 1000] (-13,8):5. [test #690 of 1000] [test #691 of 1000] (16,-13):3. [test #692 of 1000] [test #693 of 1000] (3,9):2. [test #694 of 1000] [test #695 of 1000] [test #696 of 1000] [test #697 of 1000] (-26,17):3. (-27,17):2. [test #698 of 1000] (-28,17):3. [test #699 of 1000] (1,7):4. (2,6):7. (1,8):5. [test #700 of 1000] (-20,12):4. (-21,11):5. (-20,11):5. [test #701 of 1000] [test #702 of 1000] [test #703 of 1000] [test #704 of 1000] [test #705 of 1000] [test #706 of 1000] [test #707 of 1000] [test #708 of 1000] [test #709 of 1000] [test #710 of 1000] [test #711 of 1000] [test #712 of 1000] [test #713 of 1000] [test #714 of 1000] [test #715 of 1000] [test #716 of 1000] [test #717 of 1000] [test #718 of 1000] [test #719 of 1000] [test #720 of 1000] [test #721 of 1000] [test #722 of 1000] [test #723 of 1000] [test #724 of 1000] [test #725 of 1000] [test #726 of 1000] [test #727 of 1000] (-2,-4):5. [test #7

4. [test #946 of 1000] (14,-23):5. [test #947 of 1000] (17,-23):1. (16,-24):2. (17,-24):1. [test #948 of 1000] (9,-21):5. (10,-22):3. (11,-22):4. [test #949 of 1000] (9,-22):4. [test #950 of 1000] (-2,-6):4. [test #951 of 1000] (26,1):4. (25,2):8. (26,0):4. [test #952 of 1000] (26,2):3. (25,3):4. (24,3):5. [test #953 of 1000] (26,-1):5. [test #954 of 1000] (20,-11):1. (21,-12):2. (22,-12):4. [test #955 of 1000] (13,-23):3. (14,-24):2. (15,-24):1. [test #956 of 1000] [test #957 of 1000] (24,4):7. (23,4):6. [test #958 of 1000] (25,4):4. (24,5):4. (23,5):6. [test #959 of 1000] [test #960 of 1000] (23,6):2. [test #961 of 1000] (27,-1):5. (27,-2):2. [test #962 of 1000] (28,-1):5. (27,0):4. (28,-2):5. [test #963 of 1000] (29,-1):2. (28,0):5. (29,-2):3. [test #964 of 1000] (28,-3):3. (29,-3):2. [test #965 of 1000] (29,0):3. (28,1):5. (27,1):5. [test #966 of 1000] (29,1):4. (28,2):3. (27,2):5. [test #967 of 1000] [test #968 of 1000] (27,3):3. (26,3):3. [test #969 of 1000] [test #970 of 1000] [

In [110]:
venues_agg_df, venues_types = aggregate_venues_dict(venues_dict)
venues_types

{'ATM',
 'Accessories Store',
 'Afghan Restaurant',
 'African Restaurant',
 'Airport',
 'Airport Food Court',
 'Airport Gate',
 'Airport Service',
 'Airport Terminal',
 'American Restaurant',
 'Antique Shop',
 'Arcade',
 'Argentinian Restaurant',
 'Art Gallery',
 'Art Museum',
 'Arts & Crafts Store',
 'Asian Restaurant',
 'Athletics & Sports',
 'Auto Garage',
 'Auto Workshop',
 'Automotive Shop',
 'BBQ Joint',
 'Baby Store',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Baseball Field',
 'Baseball Stadium',
 'Basketball Court',
 'Bath House',
 'Beach',
 'Bed & Breakfast',
 'Beer Garden',
 'Big Box Store',
 'Bike Trail',
 'Bistro',
 'Board Shop',
 'Bookstore',
 'Boutique',
 'Bowling Alley',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Bridal Shop',
 'Bubble Tea Shop',
 'Buffet',
 'Building',
 'Burger Joint',
 'Burrito Place',
 'Bus Station',
 'Business Service',
 'Butcher',
 'Cafeteria',
 'Café',
 'Cajun / Creole Restaurant',
 'Camera Store',
 'Campground',
 'Candy Store',
 

Out of curiousity, let's find out where we are:

In [117]:
make_map_from_dict(lat_orig, long_orig, venues_dict, 12, city_name, city_state, radius_zoom=0.2)