<a href="https://colab.research.google.com/github/mbayekebe/Coursera_Capstone/blob/master/NewYorkVenueCat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Producing New York maps with Venues and the tables for Venue categories

In [0]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    conda-4.6.1                |           py36_0         878 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         962 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0        conda-forge

The following packages will be UPDATED:

    conda:         4.5.12-py36_1000 conda-forge --> 4.6.1-py36_0 conda-forge
    geopy:         1.11.0-py36_0    conda-forge --> 1.18.1-py_0  conda-forge


Downloading and Extracting Packages
geopy-1.18.1         | 51 K

In [0]:
!wget -q -O 'newyork_data.json' https://ibm.box.com/shared/static/fbpwbovar7lf8p5sgddm06cgipa2rxpe.json
print('Data downloaded!')

with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)
    
neighborhoods_data = newyork_data['features']

# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)


for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)
    
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)


Data downloaded!
The dataframe has 5 boroughs and 306 neighborhoods.


In [0]:
address = 'New York, USA'

geolocator = Nominatim(user_agent="capstoneProject")
location = geolocator.geocode(address, timeout=60, exactly_one=True)
latitude = location.latitude
longitude = location.longitude
print('The decimal coordinates of New York are {}, {}.'.format(latitude, longitude))

The decimal coordinates of New York are 40.7308619, -73.9871558.


In [0]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, local in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhood']):
    label = '{}'.format(local)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_newyork)  
    
map_newyork

In [0]:
# function to repeat the exploring process to all the neighborhoods in New York
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=500, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, limit)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [0]:
limit = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
CLIENT_ID = 'XINVRPGHGVY1LA2CPC3CRSS3JUKV52KXTQBU0XD3T2EXLBWZ'
CLIENT_SECRET = '2AO1ORMZMOKQ15SCRUT5SYS2YXYD1BIE4NFDZXWCPHV4EPTJ'
VERSION = '20181020'

In [0]:
# Use category id 4d4b7104d754a06370d81259 Arts & Entertainmemnt
newyork_venues_arts = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7104d754a06370d81259')
newyork_venues_arts.head()
newyork_venues_arts.shape

(4220, 7)

In [0]:
newyork_venues_arts.to_csv('NewYorkVenueArts.csv')

In [0]:
# function to add markers for given venues to map
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [0]:
 map_newyork_arts = folium.Map(location=[latitude, longitude], zoom_start=12)
 addToMap(newyork_venues_arts, 'red', map_newyork_arts)
 map_newyork_arts

In [0]:
# Use category id 4d4b7105d754a06372d81259 College & University
newyork_venues_colu = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06372d81259')
newyork_venues_colu.head()
newyork_venues_colu.shape


(3062, 7)

In [0]:
newyork_venues_colu.to_csv('NewYorkVenueColU.csv')

In [0]:
# Use category id 4d4b7105d754a06374d81259 Food
newyork_venues_food = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06374d81259')
newyork_venues_food.head()
newyork_venues_food.shape


(10628, 7)

In [0]:
newyork_venues_food.to_csv('NewYorkVenueFood.csv')

In [0]:
# Use category id 4d4b7105d754a06375d81259 Professional & Other
newyork_venues_prother = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06375d81259')
newyork_venues_prother.head()
newyork_venues_prother.shape

(11352, 7)

In [0]:
newyork_venues_prother.to_csv('NewYorkVenueProf.csv')

In [0]:
# Use category id 4d4b7105d754a06376d81259 Nighlife
newyork_venues_nighlife = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06376d81259')
newyork_venues_nighlife.head()
newyork_venues_nighlife.shape

In [0]:
newyork_venues_nighlife.to_csv('NewYorkVenueNightlife.csv')

In [0]:
# Use category id 4d4b7105d754a06377d81259 Outdoor & Recreation
newyork_venues_outdoors = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06377d81259')
newyork_venues_outdoors.head()
newyork_venues_outdoors.shape

(6918, 7)

In [0]:
newyork_venues_outdoors.to_csv('NewYorkVenueOutdoors.csv')

In [0]:
# Use category id 4d4b7105d754a06378d81259 Shop & Service
newyork_venues_shopserv = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06378d81259')
newyork_venues_shopserv.head()
newyork_venues_shopserv.shape

(11318, 7)

In [0]:
newyork_venues_shopserv.to_csv('NewYorkVenueShopServ.csv')

In [0]:
# Use category id 4d4b7105d754a06379d81259 Travel & Transport
newyork_venues_travel = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4d4b7105d754a06379d81259')
newyork_venues_travel.head()
newyork_venues_travel.shape

(6010, 7)

In [0]:
newyork_venues_travel.to_csv('NewYorkVenueTravel.csv')

In [0]:
# Use category id 4e67e38e036454776db1fb3a Residence
newyork_venues_residence = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=500, categoryIds='4e67e38e036454776db1fb3a')
newyork_venues_residence.head()
newyork_venues_residence.shape

(5403, 7)

In [0]:
newyork_venues_residence.to_csv('NewYorkVenueResidence.csv')

In [0]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [0]:
#read csv file of geocoded neighborhood in NewYork
newyork_venues_arts = pd.read_csv('NewYorkVenueArts.csv', index_col=0)
newyork_venues_colu = pd.read_csv('NewYorkVenueColU.csv', index_col=0)
newyork_venues_food = pd.read_csv('NewYorkVenueFood.csv', index_col=0)
newyork_venues_prother = pd.read_csv('NewYorkVenueProf.csv', index_col=0)
newyork_venues_nightlife = pd.read_csv('NewYorkVenueNightlife.csv', index_col=0)
newyork_venues_outdoors = pd.read_csv('NewYorkVenueOutdoors.csv', index_col=0)
newyork_venues_shopserv = pd.read_csv('NewYorkVenueShopServ.csv', index_col=0)
newyork_venues_travel = pd.read_csv('NewYorkVenueTravel.csv', index_col=0)
newyork_venues_residence = pd.read_csv('NewYorkVenueResidence.csv', index_col=0)

In [0]:
df_data = neighborhoods.copy()
# df_data.rename(columns={'neighborhoods':'Neighborhood'}, inplace=True)
addColumn(df_data, 'Arts', newyork_venues_arts)
addColumn(df_data, 'CollegeUniversity', newyork_venues_colu)
addColumn(df_data, 'Food', newyork_venues_food)
addColumn(df_data, 'Professional', newyork_venues_prother)
addColumn(df_data, 'Nightlife', newyork_venues_nightlife)
addColumn(df_data, 'Outdoor', newyork_venues_outdoors)
addColumn(df_data, 'ShopServices', newyork_venues_shopserv)
addColumn(df_data, 'TravelTransport', newyork_venues_travel)
addColumn(df_data, 'Residence', newyork_venues_residence)
df_data

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Arts,CollegeUniversity,Food,Professional,Nightlife,Outdoor,ShopServices,TravelTransport,Residence
0,Bronx,Wakefield,40.894705,-73.847201,4.0,2.0,19.0,21.0,2.0,4.0,32.0,5.0,1.0
1,Bronx,Co-op City,40.874294,-73.829939,4.0,1.0,25.0,42.0,3.0,14.0,42.0,21.0,13.0
2,Bronx,Eastchester,40.887556,-73.827806,1.0,2.0,24.0,30.0,3.0,5.0,42.0,21.0,2.0
3,Bronx,Fieldston,40.895437,-73.905643,4.0,5.0,3.0,25.0,2.0,12.0,9.0,1.0,4.0
4,Bronx,Riverdale,40.890834,-73.912585,4.0,5.0,35.0,41.0,3.0,32.0,39.0,5.0,25.0
5,Bronx,Kingsbridge,40.881687,-73.902818,8.0,10.0,50.0,49.0,27.0,40.0,50.0,44.0,26.0
6,Manhattan,Marble Hill,40.876551,-73.91066,8.0,7.0,48.0,47.0,9.0,36.0,50.0,44.0,23.0
7,Bronx,Woodlawn,40.898273,-73.867315,8.0,2.0,43.0,41.0,30.0,17.0,46.0,19.0,10.0
8,Bronx,Norwood,40.877224,-73.879391,6.0,4.0,49.0,48.0,4.0,35.0,48.0,34.0,25.0
9,Bronx,Williamsbridge,40.881039,-73.857446,6.0,7.0,37.0,38.0,12.0,9.0,34.0,8.0,5.0


In [0]:
df_data.to_csv('NewYorkVenueCategoriesGrouped.csv')