# Revised Notebook

In [93]:
import pandas as pd
import geopandas as gpd
import numpy as np
import seaborn as sns
from shapely.geometry import Point, Polygon

import shapely.speedups #For point in polygon calculations
shapely.speedups.enable()

import json # library to handle JSON files

# Matplotlib and associated plotting modules
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# import K Nearest Neightbors
from sklearn.neighbors import KNeighborsClassifier

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
from folium import plugins

import requests # library to handle requests

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe#

## Geographic Data 
### Minneapolis Data

In [94]:
# Data retrieved from
# https://opendata.minneapolismn.gov/datasets/minneapolis-neighborhoods
# Data downloaded and placed in local directory.

with open('Minneapolis_Neighborhoods.geojson.json') as jsondata:
    mpls_data = json.load(jsondata)
    
#with open('Communities.geojson') as jsondata:
 #   mpls_comm_data = json.load(jsondata)
    
with open('stp_neighborhoods.geojson') as jsondata:
    stp_nbhd_data = json.load(jsondata)
    
# We will refer back to the larger mpls_data dataframe later.
mpls_nbhds = mpls_data['features']
#mpls_comms = mpls_comm_data['features']
stp_nbhds = stp_nbhd_data['features']

# Initalize the dataframe
column_names = ['Neighborhood','Latitude', 'Longitude']
df_nbhds_mpls = pd.DataFrame(columns=column_names)

# Fill in the neighborhood names. The latitude and longitude are filled in the next step.
for data in mpls_nbhds:
    nbhd_name = data['properties']['BDNAME']
    df_nbhds_mpls = df_nbhds_mpls.append({'Neighborhood':nbhd_name}, ignore_index=True)

# Populate lat/lon values for neighborhoods. Gather values not found by Geopy.
missing_latlons = []
for nbhd in df_nbhds_mpls['Neighborhood']:
    address = nbhd + ', Minneapolis, MN'
    geolocator = Nominatim(user_agent="mn_explorer")
    location = geolocator.geocode(address, timeout=10)
    if location is None:
        print(address+' is not found on geopy.')
        missing_latlons = missing_latlons +[nbhd]
    else:
        df_nbhds_mpls.loc[df_nbhds_mpls.index[df_nbhds_mpls['Neighborhood'] == nbhd].to_list(),'Latitude']= location.latitude
        df_nbhds_mpls.loc[df_nbhds_mpls.index[df_nbhds_mpls['Neighborhood'] == nbhd].to_list(),'Longitude']= location.longitude
    
print(missing_latlons)
#df_nbhds_mpls.head()


Downtown West, Minneapolis, MN is not found on geopy.
Ventura Village, Minneapolis, MN is not found on geopy.
South Uptown, Minneapolis, MN is not found on geopy.
Mid - City Industrial, Minneapolis, MN is not found on geopy.
Nicollet Island - East Bank, Minneapolis, MN is not found on geopy.
['Downtown West', 'Ventura Village', 'South Uptown', 'Mid - City Industrial', 'Nicollet Island - East Bank']


In [95]:
# Manually look up missing lat/lon values for these neighborhoods.
Downtown_West =[44.9742, -93.2733]
Ventura_Village = [44.9618, -93.2582]
# This neighborhood is now found on geopy...
#Humboldt_Industrial_Area = [45.0421, -93.3077]
South_Uptown = [44.9411, -93.2911]
Mid_City_Industrial = [44.9989, -93.2178]
Nicollet_Island = [44.9879, -93.2629]

missing_mpls = [Downtown_West, Ventura_Village, South_Uptown, Mid_City_Industrial, Nicollet_Island]

# Populate dictionary of neighborhoods and missing lat/long values
missing_dict={}
for j in range(0,len(missing_mpls)):
    entry ={missing_latlons[j]:missing_mpls[j]}
    missing_dict.update(entry)


# Fill in missing lat/long values into our dataframe
for j in range(0,len(missing_mpls)):
    value=df_nbhds_mpls.index[df_nbhds_mpls['Neighborhood']==missing_latlons[j]]
    df_nbhds_mpls.loc[value[0], 'Latitude'] = missing_dict[missing_latlons[j]][0]
    df_nbhds_mpls.loc[value[0], 'Longitude'] = missing_dict[missing_latlons[j]][1]




### Repeat for St. Paul

In [96]:
# Initalize the dataframe
column_names = ['Neighborhood','Latitude', 'Longitude']
df_nbhds_stp = pd.DataFrame(columns=column_names)


# Fill in the neighborhood names. The latitude and longitude are filled in the next step.
for data in stp_nbhds:
    nbhd_name = data['properties']['name2']
    df_nbhds_stp = df_nbhds_stp.append({'Neighborhood':nbhd_name}, ignore_index=True)
    
# Populate Lat/Long using Geopy
missing_latlons = []
for nbhd in df_nbhds_stp['Neighborhood']:
    address = nbhd + ', St. Paul, MN'
    geolocator = Nominatim(user_agent="mn_explorer")
    location = geolocator.geocode(address, timeout=10)
    if location is None:
        print(address+' is not found on geopy.')
        missing_latlons = missing_latlons +[nbhd]
    else:
        df_nbhds_stp.loc[df_nbhds_stp.index[df_nbhds_stp['Neighborhood'] == nbhd].to_list(),'Latitude']= location.latitude
        df_nbhds_stp.loc[df_nbhds_stp.index[df_nbhds_stp['Neighborhood'] == nbhd].to_list(),'Longitude']= location.longitude

print(missing_latlons)

Macalester-Groveland, St. Paul, MN is not found on geopy.
West Side Community Organization, St. Paul, MN is not found on geopy.
Summit Hill Association, St. Paul, MN is not found on geopy.
Summit-University, St. Paul, MN is not found on geopy.
Thomas-Dale/Frogtown, St. Paul, MN is not found on geopy.
West 7th Federation/Fort Road, St. Paul, MN is not found on geopy.
CapitolRiver Council, St. Paul, MN is not found on geopy.
Payne-Phalen, St. Paul, MN is not found on geopy.
The Greater East Side, St. Paul, MN is not found on geopy.
Eastview-Conway-Battle Creek-Highwood Hills, St. Paul, MN is not found on geopy.
Union Park, St. Paul, MN is not found on geopy.
['Macalester-Groveland', 'West Side Community Organization', 'Summit Hill Association', 'Summit-University', 'Thomas-Dale/Frogtown', 'West 7th Federation/Fort Road', 'CapitolRiver Council', 'Payne-Phalen', 'The Greater East Side', 'Eastview-Conway-Battle Creek-Highwood Hills', 'Union Park']


Unnamed: 0,Neighborhood,Latitude,Longitude
0,St Anthony Park,44.9792,-93.1935
1,Como,44.9759,-93.1313
2,North End,53.7432,-2.39092
3,Hamline-Midway,44.9629,-93.1669
4,Macalester-Groveland,44.9343,-93.167


### Concat the neighborhoods

In [98]:
nbhds = pd.concat([df_nbhds_mpls,df_nbhds_stp]).reset_index(drop=True)

## Gather brewery and restaurant data
### Use Foursquare API to gather brewery data

In [99]:
# Will drop these columns from the search results.
drop_columns = ['referralId', 'reasons.count', 'reasons.items', 'venue.location.labeledLatLngs',
                'venue.photos.count', 'venue.photos.groups', 'venue.location.postalCode',
                'venue.location.cc', 'venue.location.city', 'venue.location.state', 'venue.location.country',
                'venue.location.formattedAddress', 'venue.venuePage.id', 'venue.delivery.id', 'venue.delivery.url',
                'venue.delivery.provider.name', 'venue.delivery.provider.icon.prefix', 'venue.delivery.provider.icon.sizes',
                'venue.delivery.provider.icon.name', 'venue.location.neighborhood', 'venue.categories',
                'venue.location.distance', 'venue.location.crossStreet']


# Define FourSquare credentials
CLIENT_ID = 'JSN5MX1DKF5XI3CXVZADJMU5LZE5FMLT2COF00LRJDFMFWIK' # your Foursquare ID
CLIENT_SECRET = 'ZST1WYPJCG2J2LGQGUER23BPAC1OMF1BKYC4WQKSSRD3WC1T' # your Foursquare Secret
VERSION = '20200601' # Foursquare API version

category = '50327c8591d4c4b30a586d5d' # Foursquare category for brewery
radius = 3500 # in meters
LIMIT = 200

# Initialize df_brew
df_brew =pd.DataFrame(columns=['venue.id', 'venue.name', 'venue.location.address',
       'venue.location.lat', 'venue.location.lng'])

# Look at a 5 km radius area centered at each of the neighbhorhood lat/lon coordinates.
# Do FourSquare API call for each neighborhood, locating breweries. Store results in df_brew.
for j in range(0,len(nbhds)):
    lat=nbhds.loc[j,'Latitude']
    lon=nbhds.loc[j,'Longitude']
    url='https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'\
        .format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lon, category, radius, LIMIT)
    results=requests.get(url).json()
    venues = results['response']['groups'][0]['items']
    df_temp = pd.json_normalize(venues)
    df_brew = pd.concat([df_brew, df_temp])

# Drop unwanted columns, eliminate duplicates, reset index to arrive at final df_brew dataframe.    
df_brew.drop(columns=drop_columns, inplace=True)
df_brew.drop_duplicates(inplace=True)
df_brew.reset_index(inplace=True)
df_brew.drop(columns='index', inplace=True)
print(df_brew.shape)
df_brew.head()

(99, 5)


Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng
0,538bf392498e382b6fa108b4,Eastlake Craft Brewery,920 E Lake St #123,44.948491,-93.260416
1,56242508498e6aeb80142c2c,Lakes & Legends Brewing Company,1368 Lasalle Ave,44.968908,-93.279479
2,52e6ad8c498efd5a184d1105,LynLake Brewery,2934 Lyndale Ave S,44.948919,-93.288345
3,5aa9baebff03062a4b1dccba,Finnegans House,817 5th Ave S,44.972301,-93.26641
4,52d4225b498e07070c180a62,Sisyphus Brewing,712 Ontario Ave W,44.973214,-93.28904


In [100]:
# Drop the following venues from df_brew.
venue_brew_drop = {'Metro Warehouse Liquor', 'Pizza Lucé', 'SiP Coffee Bar', 'Spyhouse Coffee'}
for j in venue_brew_drop:
    df_brew.drop(df_brew.index[df_brew['venue.name']==j], inplace=True)


### Use Foursquare API to gather restaurant data

In [101]:
# Will drop these columns from the search results.
drop_columns = ['referralId', 'reasons.count', 'reasons.items', 'venue.location.labeledLatLngs',
                'venue.photos.count', 'venue.photos.groups', 'venue.location.postalCode',
                'venue.location.cc', 'venue.location.city', 'venue.location.state', 'venue.location.country',
                'venue.location.formattedAddress', 'venue.venuePage.id', 'venue.delivery.id', 'venue.delivery.url',
                'venue.delivery.provider.name', 'venue.delivery.provider.icon.prefix', 'venue.delivery.provider.icon.sizes',
                'venue.delivery.provider.icon.name', 'venue.location.neighborhood', 'venue.categories',
                'venue.location.distance', 'venue.location.crossStreet']

category = '4d4b7105d754a06374d81259' # Foursquare category for food (restaurants)
radius = 2000 # in meters
LIMIT = 200

# Initialize df_food
df_food =pd.DataFrame(columns=['venue.id', 'venue.name', 'venue.location.address',
       'venue.location.lat', 'venue.location.lng'])

# Look at a 2 km radius area centered at each of the Minneapolis neighbhorhoods.
# Do FourSquare API call for each neighborhood, locating restaurants. Store results in df_food.
counter = 0
for j in range(0,len(df_nbhds_mpls)):
    lat=df_nbhds_mpls.loc[j,'Latitude']
    lon=df_nbhds_mpls.loc[j,'Longitude']
    url='https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'\
        .format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lon, category, radius, LIMIT)
    results=requests.get(url).json()
    venues = results['response']['groups'][0]['items']
    df_temp = pd.json_normalize(venues)
#    print('df_temp shape is', df_temp.shape, '. Counter is ', counter)
    df_food = pd.concat([df_food, df_temp])
    #print(counter)
    #counter+=1
    
# Repeat above call but with St. Paul neighborhoods. Increase the radius size.
radius = 3500
counter = 0
for j in range(0,len(df_nbhds_stp)):
    lat=df_nbhds_stp.loc[j,'Latitude']
    lon=df_nbhds_stp.loc[j,'Longitude']
    url='https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'\
        .format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lon, category, radius, LIMIT)
    results=requests.get(url).json()
    venues = results['response']['groups'][0]['items']
    df_temp = pd.json_normalize(venues)
    #print('df_temp shape is', df_temp.shape, '. Counter is ', counter)
    df_food = pd.concat([df_food, df_temp])
    #print(counter)
    counter+=1

# Drop unwanted columns, eliminate duplicates, reset index to arrive at final df_food dataframe.    
df_food.drop(columns=drop_columns, inplace=True)
df_food.drop_duplicates(inplace=True)
df_food.reset_index(drop=True, inplace=True)
print(df_food.shape)
df_food.head()

(1570, 5)


Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181
2,44f1f813f964a5200a381fe3,Quang Restaurant,2719 Nicollet Ave,44.953136,-93.277745
3,50255babe4b0bc802b45360b,FIKA,2600 Park Ave,44.954555,-93.265717
4,49e4c13ef964a52034631fe3,Manny's Tortas,920 E Lake St,44.948679,-93.260649


In [102]:
df_brew.head()

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng
0,538bf392498e382b6fa108b4,Eastlake Craft Brewery,920 E Lake St #123,44.948491,-93.260416
1,56242508498e6aeb80142c2c,Lakes & Legends Brewing Company,1368 Lasalle Ave,44.968908,-93.279479
2,52e6ad8c498efd5a184d1105,LynLake Brewery,2934 Lyndale Ave S,44.948919,-93.288345
3,5aa9baebff03062a4b1dccba,Finnegans House,817 5th Ave S,44.972301,-93.26641
4,52d4225b498e07070c180a62,Sisyphus Brewing,712 Ontario Ave W,44.973214,-93.28904


### Concatenate the brewery and restaurant dataframes

In [103]:
# type variable set to 0 for restaurants
df_food['type'] = pd.Series(np.zeros(len(df_food)))

# type variable set to 1 for breweries
df_brew['type'] = pd.Series(np.ones(len(df_brew)))

In [104]:
df = pd.concat([df_food, df_brew]).reset_index(drop=True)
print(df.shape)
df.head()

(1665, 6)


Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502,0.0
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181,0.0
2,44f1f813f964a5200a381fe3,Quang Restaurant,2719 Nicollet Ave,44.953136,-93.277745,0.0
3,50255babe4b0bc802b45360b,FIKA,2600 Park Ave,44.954555,-93.265717,0.0
4,49e4c13ef964a52034631fe3,Manny's Tortas,920 E Lake St,44.948679,-93.260649,0.0


## Assign neighborhoods to breweries and restaurants
### Construct geodataframes of neighborhoods

In [191]:
# Make Minneapolis neighborhoods
mpls_nbhd_geo = gpd.read_file('Minneapolis_Neighborhoods.geojson.json', crs={'init':'epsg:4326'})
mpls_nbhd_geo = mpls_nbhd_geo[['geometry', 'BDNAME']]
mpls_nbhd_geo['city']=pd.Series(['Minneapolis']*len(mpls_nbhd_geo))

# Minneapolis communities are closer in size to the St. Paul neighborhoods.
# If we prefer to consider Minneapolis communities and St. Paul neighborhoods, comment the above code and
# run the following instead.

#mpls_comm_geo = gpd.read_file('Communities.geojson')
#mpls_comm_geo = mpls_comm_geo[['geometry', 'CommName']]
#mpls_comm_geo['city']=pd.Series(['Minneapolis']*len(mpls_comm_geo))
#mpls_comm_geo.rename(columns={'CommName':'BDNAME'}, inplace=True) # Rename so columns match

# Make St. Paul neighborhoods
stp_geo = gpd.read_file('stp_neighborhoods.geojson', crs={'init':'epsg:4326'})
stp_geo = stp_geo[['geometry', 'name2']]
stp_geo.rename(columns={'name2':'BDNAME'}, inplace=True) # Rename so columns match
stp_geo['city']=pd.Series(['St. Paul']*len(stp_geo))

# Combine to twin cities neighborhoods
tc_geo=pd.concat([mpls_nbhd_geo, stp_geo]).drop_duplicates().reset_index(drop=True)
tc_geo.to_crs(epsg=3310,inplace=True) # Use projected coordinate system to compute distances.
print(tc_geo.shape)
tc_geo.head()

(104, 3)


Unnamed: 0,geometry,BDNAME,city
0,"MULTIPOLYGON (((2099123.466 1067774.439, 20991...",Phillips West,Minneapolis
1,"MULTIPOLYGON (((2098635.147 1070167.232, 20986...",Downtown West,Minneapolis
2,"MULTIPOLYGON (((2099914.727 1070072.087, 20999...",Downtown East,Minneapolis
3,"MULTIPOLYGON (((2099951.798 1068632.807, 20999...",Ventura Village,Minneapolis
4,"MULTIPOLYGON (((2096296.348 1070182.925, 20962...",Sumner - Glenwood,Minneapolis


In [189]:
# Turn df_brew into geodataframe
df_brew_geo = gpd.GeoDataFrame(df_brew, 
                               geometry=gpd.points_from_xy(df_brew['venue.location.lng'],
                                                           df_brew['venue.location.lat']),
                              crs={'init':'epsg:4326'}) # Projects lat/long coords so that distance can be computed
df_brew_geo.to_crs(epsg=3310,inplace=True)
#df_brew_geo.reset_index(inplace=True)

# Turn df_food into geodataframe
df_food_geo = gpd.GeoDataFrame(df_food, 
                               geometry=gpd.points_from_xy(df_food['venue.location.lng'], 
                                                           df_food['venue.location.lat']),
                              crs={'init':'epsg:4326'})
df_food_geo.to_crs(epsg=3310,inplace=True)

df_food_geo['neighborhood']=pd.Series(np.zeros(len(df_food)))
df_brew_geo['neighborhood']=pd.Series(np.zeros(len(df_brew)))

brewery_count=[]
food_count=[]
for j in range(0,len(tc_geo)):
    nbhd = tc_geo.loc[j,'BDNAME']  # nbhd is jth neighborhood name, string
    nbhd_geo = tc_geo[tc_geo['BDNAME'] == nbhd]  # nbhd_geo is jth row
    nbhd_geo.reset_index(drop=True, inplace=True)  # reset index of dataframe
    brew_mask = df_brew_geo['geometry'].within(nbhd_geo.loc[0,'geometry'])  # Boolean mask of breweries in nbhd.
    food_mask = df_food_geo['geometry'].within(nbhd_geo.loc[0,'geometry'])  # Boolean mask of restaurants in nbhd.
    
    # Add neighborhood columns to df_food
    df_food_geo['neighborhood'].mask(food_mask, other=nbhd, inplace=True)
    food_count.append(sum(food_mask))  # create list of brewery counts per neighborhood.

    # Add neighborhood column to df_brew.
    df_brew_geo['neighborhood'].mask(brew_mask, other=nbhd, inplace=True)
    brewery_count.append(sum(brew_mask))  # create list of brewery counts per neighborhood.


# Add column to tc_geo    
tc_geo['Restaurant Count'] = pd.Series(food_count) # add brewery count column        
    
# Add column to tc_geo    
tc_geo['Brewery Count'] = pd.Series(brewery_count) # add brewery count column
tc_geo.tail()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,geometry,BDNAME,city,Restaurant Count,Brewery Count
99,"MULTIPOLYGON (((-93.04556 44.97503, -93.04599 ...",Payne-Phalen,St. Paul,0,0
100,"MULTIPOLYGON (((-93.04556 44.97503, -93.04552 ...",The Greater East Side,St. Paul,0,0
101,"MULTIPOLYGON (((-93.05689 44.94264, -93.06328 ...",Dayton's Bluff,St. Paul,0,0
102,"MULTIPOLYGON (((-93.05027 44.91949, -93.05195 ...",Eastview-Conway-Battle Creek-Highwood Hills,St. Paul,0,0
103,"MULTIPOLYGON (((-93.14661 44.94140, -93.15172 ...",Union Park,St. Paul,0,0


In [187]:
df_brew_geo.head(2)

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood
0,538bf392498e382b6fa108b4,Eastlake Craft Brewery,920 E Lake St #123,44.948491,-93.260416,1.0,POINT (2099669.745 1066507.902),0.0
1,56242508498e6aeb80142c2c,Lakes & Legends Brewing Company,1368 Lasalle Ave,44.968908,-93.279479,1.0,POINT (2097586.909 1068247.256),0.0


## Distance to closest brewery
Here we add a column to the restaurant/brewery data frame indicating distance to closest brewery. By default, breweries will have distance zero to the nearest breweries.

In [204]:
df['nearest_brew']=[0.0]*len(df)

In [205]:
df.head()

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,nearest_brew
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502,0.0,0.0
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181,0.0,0.0
2,44f1f813f964a5200a381fe3,Quang Restaurant,2719 Nicollet Ave,44.953136,-93.277745,0.0,0.0
3,50255babe4b0bc802b45360b,FIKA,2600 Park Ave,44.954555,-93.265717,0.0,0.0
4,49e4c13ef964a52034631fe3,Manny's Tortas,920 E Lake St,44.948679,-93.260649,0.0,0.0


In [200]:
for j in range(len(df_food)):
    df_food_geo['']
(df_food_geo.distance(other=df_brew_geo.loc[0,'geometry'])/1000).sort_values()

8          0.009572
0          0.012748
4          0.027796
922        0.028506
25         0.037020
           ...     
1256    6572.754015
1258    6573.049880
1260    6573.735626
1261    6573.835048
1255    6573.882430
Length: 1570, dtype: float64

In [155]:
df_food_geo.crs
df_food_geo.head(2)

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502,0.0,POINT (-93.26050 44.94859),Midtown Phillips
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181,0.0,POINT (-93.27118 44.94861),Phillips West


In [222]:
### This is it...
df_food_geo['nearest_brew']=np.zeros(len(df_food_geo))

for j in range(len(df_food_geo)):
    df_food_geo['nearest_brew'][j]=(df_brew_geo['geometry'].distance(other=df_food_geo['geometry'][j])/1000).min()
    
df_food_geo.head()    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood,nearest_brew
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502,0.0,POINT (2099660.185 1066516.336),0.0,0.012748
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181,0.0,POINT (2098843.476 1066281.646),0.0,0.856686
2,44f1f813f964a5200a381fe3,Quang Restaurant,2719 Nicollet Ave,44.953136,-93.277745,0.0,POINT (2098202.925 1066616.008),0.0,0.940641
3,50255babe4b0bc802b45360b,FIKA,2600 Park Ave,44.954555,-93.265717,0.0,POINT (2099078.612 1067032.444),0.0,0.790305
4,49e4c13ef964a52034631fe3,Manny's Tortas,920 E Lake St,44.948679,-93.260649,0.0,POINT (2099646.176 1066522.638),0.0,0.027796


In [223]:
df_food_geo.tail()

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood,nearest_brew
1565,43b256f7f964a520a12c1fe3,Green Mill Restaurant & Bar,6025 Hudson Road,44.946858,-92.983987,0.0,POINT (2120838.802 1072487.926),0.0,3.328154
1566,4abe4658f964a5205c8c20e3,Taco John's,1010 Gershwin Ave N,44.963493,-92.981704,0.0,POINT (2120497.677 1074298.411),0.0,3.219574
1567,4a714021f964a52030d91fe3,Denny's,255 Century Ave N,44.950312,-92.985374,0.0,POINT (2120625.892 1072822.232),0.0,3.080543
1568,53473101498e283cf8dc0fdf,Ruchi's South Indian Cuisine,27 Century Ave,44.94241,-92.985321,0.0,POINT (2120874.748 1071987.526),0.0,3.475527
1569,4b1e8b53f964a520671b24e3,Subway,2121 University Ave W,44.960585,-93.188408,0.0,POINT (2104800.812 1069384.660),0.0,0.37037


In [221]:
df_food_geo.head(2)

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood,nearest_brew
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502,0.0,POINT (2099660.185 1066516.336),0.0,0.012748
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181,0.0,POINT (2098843.476 1066281.646),0.0,0.0


In [145]:
import geopandas as gpd
from shapely.geometry import Point
geom=[Point(xy) for xy in zip([117.454361,117.459880],[38.8459879,38.846255])]
gdf=gpd.GeoDataFrame(geometry=geom,crs={'init':'epsg:4326'})
gdf.to_crs(epsg=3310,inplace=True)
l=gdf.distance(gdf.shift())
print(l)

  return _prepare_from_string(" ".join(pjargs))


0           NaN
1    479.450134
dtype: float64


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:3310
Right CRS: None

  


In [170]:
df_food_geo = df_food_geo.set_crs(epsg=4326, inplace=True)
df_food_geo.head()

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood
0,44b492d9f964a52057351fe3,Midtown Global Market,920 E Lake St,44.948589,-93.260502,0.0,POINT (-93.26050 44.94859),Midtown Phillips
1,49fb95f5f964a5205d6e1fe3,Taqueria La Hacienda,334 E Lake St #101,44.948605,-93.271181,0.0,POINT (-93.27118 44.94861),Phillips West
2,44f1f813f964a5200a381fe3,Quang Restaurant,2719 Nicollet Ave,44.953136,-93.277745,0.0,POINT (-93.27774 44.95314),Whittier
3,50255babe4b0bc802b45360b,FIKA,2600 Park Ave,44.954555,-93.265717,0.0,POINT (-93.26572 44.95455),Phillips West
4,49e4c13ef964a52034631fe3,Manny's Tortas,920 E Lake St,44.948679,-93.260649,0.0,POINT (-93.26065 44.94868),Midtown Phillips


In [163]:
df_food_geo.to_crs('EPSG:4326').distance(other=df_brew_geo.loc[0,'geometry'].to_crs('EPSG:4326'))

AttributeError: 'Point' object has no attribute 'to_crs'

In [176]:
# Turn df_brew into geodataframe
df_brew_geo = df_brew_geo.to_crs(epsg=3310)
df_brew_geo.head()

Unnamed: 0,venue.id,venue.name,venue.location.address,venue.location.lat,venue.location.lng,type,geometry,neighborhood
0,538bf392498e382b6fa108b4,Eastlake Craft Brewery,920 E Lake St #123,44.948491,-93.260416,1.0,POINT (2099669.745 1066507.902),0.0
1,56242508498e6aeb80142c2c,Lakes & Legends Brewing Company,1368 Lasalle Ave,44.968908,-93.279479,1.0,POINT (2097586.909 1068247.256),0.0
2,52e6ad8c498efd5a184d1105,LynLake Brewery,2934 Lyndale Ave S,44.948919,-93.288345,1.0,POINT (2097521.966 1065935.018),0.0
3,5aa9baebff03062a4b1dccba,Finnegans House,817 5th Ave S,44.972301,-93.26641,1.0,POINT (2098481.482 1068895.655),0.0
4,52d4225b498e07070c180a62,Sisyphus Brewing,712 Ontario Ave W,44.973214,-93.28904,1.0,POINT (2096724.410 1068491.590),0.0


In [182]:

df_brew_geo.distance(other=df_brew_geo.loc[0,'geometry'])

0         0.000000
1      2713.587431
2      2222.870190
3      2667.082530
4      3551.057643
          ...     
93    14086.465176
94    15096.103089
95    15098.789373
97    19023.663507
98    15643.489450
Length: 95, dtype: float64