# Coursera Capstone
## The Battle of the Neighborhoods
This document will be used for the Coursera Capstone project, part of the IBM Data Science Professional Certificate.

In [55]:
import pandas as pd
import numpy as np

import json # library to handle JSON files

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import requests # library to handle requests

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



### Create a dataframe of neighborhoods in Minneapolis
As far as I could tell, the data for Minneapolis neighborhoods did not come attached with latitude and longitude values. The following section creates a dataframe with neighborhood names and attached lat/long values.

In [4]:
# Data retrieved from
# https://opendata.minneapolismn.gov/datasets/minneapolis-neighborhoods
# Data downloaded and placed in local directory

with open('Minneapolis_Neighborhoods.geojson.json') as jsondata:
    mpls_data = json.load(jsondata)

Define `mpls_nbhds` to be the features of the geojson file imported above.

In [6]:
mpls_nbhds = mpls_data['features']

In [None]:
# Look at the first entry. Note the neighborhood name has key 'BDNAME'.
mpls_nbhds[0]

In [39]:
# Initalize the dataframe
column_names = ['Neighborhood','Latitude', 'Longitude']
nbhds = pd.DataFrame(columns=column_names)
nhbds

In [41]:
# Fill in the neighborhood names. The latitude and longitude are filled in the next step.
for data in mpls_nbhds:
    nbhd_name = data['properties']['BDNAME']
    nbhds = nbhds.append({'Neighborhood':nbhd_name}, ignore_index=True)

In [51]:
nbhds.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Phillips West,1.0,
1,Downtown West,,
2,Downtown East,,
3,Ventura Village,,
4,Sumner - Glenwood,,


#### Populate the latitude and longitude using Geopy

In [90]:
counter = 0
missing_addresses = []
for nbhd in nbhds['Neighborhood']:
    address = nbhd + ', Minneapolis, MN'
    geolocator = Nominatim(user_agent="mpls_explorer")
    location = geolocator.geocode(address)
    if location is None:
        print(address+' is not found on geopy.')
        missing_addresses = missing_addresses +[nbhd]
    else:
        latitude = location.latitude
        longitude = location.longitude
        nbhds['Latitude'][counter] = latitude
        nbhds['Longitude'][counter] = longitude
    counter+=1
print(missing_addresses)
nbhds.head()

Downtown West, Minneapolis, MN is not found on geopy.
Ventura Village, Minneapolis, MN is not found on geopy.
Humboldt Industrial Area, Minneapolis, MN is not found on geopy.
South Uptown, Minneapolis, MN is not found on geopy.
Mid - City Industrial, Minneapolis, MN is not found on geopy.
Nicollet Island - East Bank, Minneapolis, MN is not found on geopy.
['Downtown West', 'Ventura Village', 'Humboldt Industrial Area', 'South Uptown', 'Mid - City Industrial', 'Nicollet Island - East Bank']


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Phillips West,44.9539,-93.2663
1,Downtown West,,
2,Downtown East,44.975,-93.2599
3,Ventura Village,,
4,Sumner - Glenwood,44.9837,-93.2914


There are 6 neighborhoods that Geopy could not identify. We can manually input these by pulling the location data from a google search.

In [92]:
Downtown_West =[44.9742, -93.2733]
Ventura_Village = [44.9618, -93.2582]
Humboldt_Industrial_Area = [45.0421, -93.3077]
South_Uptown = [44.9411, -93.2911]
Mid_City_Industrial = [44.9989, -93.2178]
Nicollet_Island = [44.9879, -93.2629]

missing_latlon = [Downtown_West, Ventura_Village, Humboldt_Industrial_Area, South_Uptown, Mid_City_Industrial, Nicollet_Island]

In [96]:
missing_dict={}
for j in range(0,6):
    entry ={missing_addresses[j]:missing_latlon[j]}
    missing_dict.update(entry)
missing_dict

{'Downtown West': [44.9742, -93.2733],
 'Ventura Village': [44.9618, -93.2582],
 'Humboldt Industrial Area': [45.0421, -93.3077],
 'South Uptown': [44.9411, -93.2911],
 'Mid - City Industrial': [44.9989, -93.2178],
 'Nicollet Island - East Bank': [44.9879, -93.2629]}

In [112]:
# Fill in missing lat/long values into our dataframe
for j in range(0,6):
    value=nbhds.index[nbhds['Neighborhood']==missing_addresses[j]]
    nbhds.loc[value[0], 'Latitude'] = missing_dict[missing_addresses[j]][0]
    nbhds.loc[value[0], 'Longitude'] = missing_dict[missing_addresses[j]][1]

nbhds.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Phillips West,44.9539,-93.2663
1,Downtown West,44.9742,-93.2733
2,Downtown East,44.975,-93.2599
3,Ventura Village,44.9618,-93.2582
4,Sumner - Glenwood,44.9837,-93.2914


In [114]:
nbhds.shape

(87, 3)

According to Wikipedia, there are 81 official neighborhoods. Here we tally 87. Some cross-referencing perhaps is in order.

### Use FourSquare API to gather data about breweries in Minneapolis

In [207]:


search_query ='Brewing'

LIMIT = 200
#radius = 25000
url = 'https://api.foursquare.com/v2/venues/\
search?client_id={}&client_secret={}&v={}\
&ll={},{}&query={}&limit={}'\
.format(CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        mpls_latitude,
        mpls_longitude,
        search_query,
        LIMIT)
url

results = requests.get(url).json()
venues = results['response']['venues']
# tranform venues into a dataframe
df_brewing = json_normalize(venues)

#### Second query

search_query ='Brewery'
category = '50327c8591d4c4b30a586d5d' #Foursquare category for brewery

url = 'https://api.foursquare.com/v2/venues/\
explore?client_id={}&client_secret={}&v={}\
&ll={},{}&categoryId={}&limit={}'\
.format(CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        mpls_latitude,
        mpls_longitude,
        category,
        LIMIT)
url

results = requests.get(url).json()
venues = results['response']['venues']
# tranform venues into a dataframe
#df_brewery = json_normalize(venues)
venues




KeyError: 'venues'

In [None]:
search_query ='Brewing'

LIMIT = 200
#radius = 25000
url = 'https://api.foursquare.com/v2/venues/\
search?client_id={}&client_secret={}&v={}\
&ll={},{}&query={}&limit={}'\
.format(CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        mpls_latitude,
        mpls_longitude,
        search_query,
        LIMIT)
url

results = requests.get(url).json()
#results
venues = results['response']['venues']
# tranform venues into a dataframe
#df_brewery = json_normalize(venues)
results

In [230]:
# Define FourSquare credentials
CLIENT_ID = 'JSN5MX1DKF5XI3CXVZADJMU5LZE5FMLT2COF00LRJDFMFWIK' # your Foursquare ID
CLIENT_SECRET = 'ZST1WYPJCG2J2LGQGUER23BPAC1OMF1BKYC4WQKSSRD3WC1T' # your Foursquare Secret
VERSION = '20200101' # Foursquare API version

# Find Minneapolis latitude and longitude
address = 'Minneapolis, MN'
geolocator = Nominatim(user_agent="mpls_explorer")
location = geolocator.geocode(address)
mpls_latitude = location.latitude # neighborhood latitude value
mpls_longitude = location.longitude # neighborhood longitude value

category = '50327c8591d4c4b30a586d5d' #Foursquare category for brewery

radius = 25000
url = 'https://api.foursquare.com/v2/venues/\
explore?client_id={}&client_secret={}&v={}\
&ll={},{}&categoryId={}&radius={}&limit={}'\
.format(CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        mpls_latitude,
        mpls_longitude,
        category,
        radius,
        LIMIT)
url

results = requests.get(url).json()
# The [0] below threw me off. This article helped.
# https://medium.com/@aboutiana/a-brief-guide-to-using-foursquare-api-with-a-hands-on-example-on-python-6fc4d5451203
venues = results['response']['groups'][0]['items']
# tranform venues into a dataframe
#venues
df_brewery = pd.json_normalize(venues)
df_brewery.head()

Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.address,venue.location.crossStreet,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,...,venue.photos.count,venue.photos.groups,venue.location.neighborhood,venue.venuePage.id,venue.delivery.id,venue.delivery.url,venue.delivery.provider.name,venue.delivery.provider.icon.prefix,venue.delivery.provider.icon.sizes,venue.delivery.provider.icon.name
0,e-0-4c8d128bc37a6dcb86d0fc7a-0,0,"[{'summary': 'This spot is popular', 'type': '...",4c8d128bc37a6dcb86d0fc7a,Fulton Brewing Company,414 6th Ave N,at 5th St. N,44.984862,-93.278828,"[{'label': 'display', 'lat': 44.98486196804921...",...,0,[],,,,,,,,
1,e-0-5047b636e4b04db60102f96d-1,0,"[{'summary': 'This spot is popular', 'type': '...",5047b636e4b04db60102f96d,Dangerous Man Brewing Co,1300 2nd St NE,,45.001049,-93.266337,"[{'label': 'display', 'lat': 45.00104863763394...",...,0,[],,,,,,,,
2,e-0-52190bad11d28f3e1ce73946-2,0,"[{'summary': 'This spot is popular', 'type': '...",52190bad11d28f3e1ce73946,Surly Brewing Company,520 Malcolm Ave SE,SE 5th St,44.973226,-93.210072,"[{'label': 'display', 'lat': 44.97322598772595...",...,0,[],"Prospect Park, Minneapolis, MN",,,,,,,
3,e-0-56242508498e6aeb80142c2c-3,0,"[{'summary': 'This spot is popular', 'type': '...",56242508498e6aeb80142c2c,Lakes & Legends Brewing Company,1368 Lasalle Ave,,44.968908,-93.279479,"[{'label': 'display', 'lat': 44.96890779142156...",...,0,[],,465641085.0,,,,,,
4,e-0-5aa9baebff03062a4b1dccba-4,0,"[{'summary': 'This spot is popular', 'type': '...",5aa9baebff03062a4b1dccba,Finnegans House,817 5th Ave S,btwn S 8th & 9th St,44.972301,-93.26641,"[{'label': 'display', 'lat': 44.97230088563604...",...,0,[],,,,,,,,


In [231]:
df_brewery.shape

(100, 28)

In [234]:
df_brewery['venue.name'][0:50]

0                          Fulton Brewing Company
1                        Dangerous Man Brewing Co
2                           Surly Brewing Company
3                 Lakes & Legends Brewing Company
4                                 Finnegans House
5                                  Inbound BrewCo
6                                Sisyphus Brewing
7                          Indeed Brewing Company
8                   Minneapolis Town Hall Brewery
9                       Day Block Brewing Company
10                            Utepils Brewing Co.
11                  Urban Growler Brewing Company
12                              Modist Brewing Co
13                          Pryes Brewing Company
14                                   Venn Brewing
15                 Fair State Brewing Cooperative
16                              Bauhaus Brew Labs
17                                Insight Brewing
18                              Headflyer Brewing
19                           Sociable Cider Werks


In [223]:
df_brewery['items']

0    [{'reasons': {'count': 0, 'items': [{'summary'...
Name: items, dtype: object

In [199]:
print(df_brewing['name'])
print(df_brewery['name'])

0             Day Block Brewing Company
1                    Clockwerks Brewing
2                      Sisyphus Brewing
3                 Surly Brewing Company
4                Indeed Brewing Company
5                Fulton Brewing Company
6         Urban Growler Brewing Company
7            August Schell's Brewing Co
8       Lakes & Legends Brewing Company
9                     Modist Brewing Co
10                   BlackStack Brewing
11                Pryes Brewing Company
12                      Insight Brewing
13    Lakes and Legends Brewing Company
14                     Surly Brewing Co
15          Gastrotruck @Fulton Brewing
16                          LTD Brewing
17             Brass Foundry Brewing Co
18                    Headflyer Brewing
19             Dangerous Man Brewing Co
20         St. Paul Brewing Cooperative
21                  Utepils Brewing Co.
22                       Clutch Brewing
23                Page James Brewing Co
24             Lyn Lake Brewing Rooftop


In [195]:
df['name']

0             Day Block Brewing Company
1                    Clockwerks Brewing
2                      Sisyphus Brewing
3                 Surly Brewing Company
4                Indeed Brewing Company
5                Fulton Brewing Company
6         Urban Growler Brewing Company
7            August Schell's Brewing Co
8       Lakes & Legends Brewing Company
9                     Modist Brewing Co
10                   BlackStack Brewing
11                Pryes Brewing Company
12                      Insight Brewing
13    Lakes and Legends Brewing Company
14                     Surly Brewing Co
15          Gastrotruck @Fulton Brewing
16                          LTD Brewing
17             Brass Foundry Brewing Co
18                    Headflyer Brewing
19             Dangerous Man Brewing Co
20         St. Paul Brewing Cooperative
21                  Utepils Brewing Co.
22                       Clutch Brewing
23                Page James Brewing Co
24             Lyn Lake Brewing Rooftop


### Clean and format brewery data

By looking at the columns and head, we see some candidates for removal. For example, the second listing is the twincities brewery tours, which is not actually a brewery. Additionally, its `location.address` is NaN. This row is a candidate for removal. Let's see if we can further whittle this data set down.

#### Investigation of `NaN` location addresses

In [184]:
df['location.lat']

0     44.973320
1     44.978428
2     44.973312
3     44.949214
4     44.948919
5     44.922766
6     45.010180
7     45.012886
8     44.985346
9     44.984942
10    45.090694
11    44.970515
12    44.929427
13    44.884684
14    44.948491
15    44.999238
16    44.999260
17    44.999921
18    44.960702
19    44.972301
20    44.836448
21    44.997124
22    44.960776
23    44.984862
24    44.962073
25    44.948899
26    44.946533
27    44.970105
28    44.949752
29    44.967841
30    45.014661
31    44.979879
32    44.943512
33    45.031451
34    44.973886
35    44.918505
36    45.000011
37    44.913052
38    44.979734
39    44.999973
40    44.903912
41    44.927864
42    45.031060
43    44.898022
44    44.938302
45    44.903788
46    45.003368
47    45.058820
48    44.898927
49    44.865702
Name: location.lat, dtype: float64

In [183]:
df.iloc[48]

id                                                        54cd6429498e5239f97e4693
name                                           Gastrotruck @ Dangerous Man Brewery
categories                       [{'id': '4bf58dd8d48988d1cb941735', 'name': 'F...
referralId                                                            v-1594759764
hasPerk                                                                      False
location.address                                                               NaN
location.crossStreet                                                           NaN
location.lat                                                               44.8989
location.lng                                                              -93.3299
location.labeledLatLngs          [{'label': 'display', 'lat': 44.89892678912602...
location.distance                                                            10092
location.postalCode                                                            NaN
loca

### Generating a Choropleth Map

In [174]:
m = folium.Map(location=[mpls_latitude, mpls_longitude], zoom_start=13)

In [None]:
folium.GeoJson(mpls_data).add_to(m)
m